From 1f31183be48fe86d3dbb3015f20d5418903ff759 Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Thu, 3 Feb 2022 12:11:33 +0100 Subject: [PATCH 001/100] init compute_factor_ee --- org/qmckl_jastrow.org | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index c70ef9f..a713a5e 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -1642,6 +1642,22 @@ integer function qmckl_compute_factor_ee_f(context, walk_num, elec_num, up_num, end function qmckl_compute_factor_ee_f #+end_src +#+begin_src c :tangle (eval h_func) :comments org + qmckl_exit_code qmckl_compute_factor_ee ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* asymp_jasb, + double* const factor_ee ) { + + + } +#+end_src + #+CALL: generate_c_header(table=qmckl_factor_ee_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: From b0bfb3157c7ae9a34943fd9a8ce8b0d302c3a426 Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Thu, 3 Feb 2022 17:10:31 +0100 Subject: [PATCH 002/100] translation completed; error at accessing ee_distance_rescaled --- org/qmckl_jastrow.org | 91 +++++++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 42 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index a713a5e..0f63118 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -1642,7 +1642,7 @@ integer function qmckl_compute_factor_ee_f(context, walk_num, elec_num, up_num, end function qmckl_compute_factor_ee_f #+end_src -#+begin_src c :tangle (eval h_func) :comments org +#+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_factor_ee ( const qmckl_context context, const int64_t walk_num, @@ -1654,8 +1654,56 @@ end function qmckl_compute_factor_ee_f const double* asymp_jasb, double* const factor_ee ) { + int64_t ipar; // can we use a smaller integer? + double pow_ser, x, spin_fact, power_ser; + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (walk_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (bord_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + for (int nw = 0; nw < walk_num; ++nw) { + factor_ee[nw] = 0.0; // put init array here. + for (int j = 0; j < elec_num; ++j ) { + for (int i = 0; i < j; ++i) { + x = ee_distance_rescaled[nw][i][j]; + power_ser = 0.0; + spin_fact = 1.0; + ipar = 0; // index of asymp_jasb + + for (int p = 1; p < bord_num; ++p) { + x = x * ee_distance_rescaled[nw][i][j]; + power_ser = power_ser + bord_vector[p + 1] * x; + } + + if(j <= up_num || i > up_num) { + spin_fact = 0.5; + ipar = 1; + } + + factor_ee[nw] = factor_ee[nw] + spin_fact * bord_vector[0] * \ + ee_distance_rescaled[nw][i][j] / \ + (1.0 + bord_vector[1] * \ + ee_distance_rescaled[nw][i][j]) \ + -asymp_jasb[ipar] + power_ser; + + } } + } + + return QMCKL_SUCCESS; +} #+end_src #+CALL: generate_c_header(table=qmckl_factor_ee_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -1677,47 +1725,6 @@ end function qmckl_compute_factor_ee_f #+CALL: generate_c_interface(table=qmckl_factor_ee_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_factor_ee & - (context, & - walk_num, & - elec_num, & - up_num, & - bord_num, & - bord_vector, & - ee_distance_rescaled, & - asymp_jasb, & - factor_ee) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: walk_num - integer (c_int64_t) , intent(in) , value :: elec_num - integer (c_int64_t) , intent(in) , value :: up_num - integer (c_int64_t) , intent(in) , value :: bord_num - real (c_double ) , intent(in) :: bord_vector(bord_num + 1) - real (c_double ) , intent(in) :: ee_distance_rescaled(elec_num,elec_num,walk_num) - real (c_double ) , intent(in) :: asymp_jasb(2) - real (c_double ) , intent(out) :: factor_ee(walk_num) - - integer(c_int32_t), external :: qmckl_compute_factor_ee_f - info = qmckl_compute_factor_ee_f & - (context, & - walk_num, & - elec_num, & - up_num, & - bord_num, & - bord_vector, & - ee_distance_rescaled, & - asymp_jasb, & - factor_ee) - - end function qmckl_compute_factor_ee - #+end_src *** Test #+begin_src python :results output :exports none :noweb yes From 81d55b4189daf7b5ca22444eb03b598e13cece14 Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Thu, 3 Feb 2022 18:27:33 +0100 Subject: [PATCH 003/100] ee_distance_rescaled fixed;assert fails due to number mismatch --- org/qmckl_jastrow.org | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 0f63118..f47139f 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -1654,7 +1654,7 @@ end function qmckl_compute_factor_ee_f const double* asymp_jasb, double* const factor_ee ) { - int64_t ipar; // can we use a smaller integer? + int ipar; // can we use a smaller integer? double pow_ser, x, spin_fact, power_ser; if (context == QMCKL_NULL_CONTEXT) { @@ -1677,13 +1677,13 @@ end function qmckl_compute_factor_ee_f factor_ee[nw] = 0.0; // put init array here. for (int j = 0; j < elec_num; ++j ) { for (int i = 0; i < j; ++i) { - x = ee_distance_rescaled[nw][i][j]; + x = ee_distance_rescaled[j + elec_num*(i + elec_num*nw)]; power_ser = 0.0; spin_fact = 1.0; ipar = 0; // index of asymp_jasb for (int p = 1; p < bord_num; ++p) { - x = x * ee_distance_rescaled[nw][i][j]; + x = x * ee_distance_rescaled[j + elec_num*(i + elec_num*nw)]; power_ser = power_ser + bord_vector[p + 1] * x; } @@ -1693,9 +1693,9 @@ end function qmckl_compute_factor_ee_f } factor_ee[nw] = factor_ee[nw] + spin_fact * bord_vector[0] * \ - ee_distance_rescaled[nw][i][j] / \ + ee_distance_rescaled[j + elec_num*(i + elec_num*nw)] / \ (1.0 + bord_vector[1] * \ - ee_distance_rescaled[nw][i][j]) \ + ee_distance_rescaled[j + elec_num*(i + elec_num*nw)]) \ -asymp_jasb[ipar] + power_ser; } From b8b2997382dd883aa80d330e6a2aefa2d5c323a1 Mon Sep 17 00:00:00 2001 From: v1j4y Date: Fri, 4 Feb 2022 17:13:15 +0100 Subject: [PATCH 004/100] Fixed indexing of ee_distance_rescaled. --- org/qmckl_jastrow.org | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index fd2c8a5..57628bf 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -1624,7 +1624,7 @@ end function qmckl_compute_factor_ee_f double* const factor_ee ) { int ipar; // can we use a smaller integer? - double pow_ser, x, spin_fact, power_ser; + double pow_ser, x, x1, spin_fact, power_ser; if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; @@ -1646,13 +1646,14 @@ end function qmckl_compute_factor_ee_f factor_ee[nw] = 0.0; // put init array here. for (int j = 0; j < elec_num; ++j ) { for (int i = 0; i < j; ++i) { - x = ee_distance_rescaled[j + elec_num*(i + elec_num*nw)]; + x = ee_distance_rescaled[j * (walk_num * elec_num) + i * (walk_num) + nw]; + x1 = x; power_ser = 0.0; spin_fact = 1.0; ipar = 0; // index of asymp_jasb for (int p = 1; p < bord_num; ++p) { - x = x * ee_distance_rescaled[j + elec_num*(i + elec_num*nw)]; + x = x * x1; power_ser = power_ser + bord_vector[p + 1] * x; } @@ -1662,9 +1663,9 @@ end function qmckl_compute_factor_ee_f } factor_ee[nw] = factor_ee[nw] + spin_fact * bord_vector[0] * \ - ee_distance_rescaled[j + elec_num*(i + elec_num*nw)] / \ + x1 / \ (1.0 + bord_vector[1] * \ - ee_distance_rescaled[j + elec_num*(i + elec_num*nw)]) \ + x1) \ -asymp_jasb[ipar] + power_ser; } From 794ee5fe8c7a55c1c888c54c6f4df6f23a01cd7f Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Mon, 7 Feb 2022 12:17:44 +0100 Subject: [PATCH 005/100] remove generate interface --- org/qmckl_jastrow.org | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 57628bf..32e2fb4 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -1652,7 +1652,7 @@ end function qmckl_compute_factor_ee_f spin_fact = 1.0; ipar = 0; // index of asymp_jasb - for (int p = 1; p < bord_num; ++p) { + for (int p = 1; p <= bord_num; ++p) { x = x * x1; power_ser = power_ser + bord_vector[p + 1] * x; } @@ -1693,7 +1693,6 @@ end function qmckl_compute_factor_ee_f #+end_src - #+CALL: generate_c_interface(table=qmckl_factor_ee_args,rettyp=get_value("CRetType"),fname=get_value("Name")) *** Test From 2332007a7ce21a5ab7c978e0363a10dceb60e96e Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Mon, 7 Feb 2022 13:53:50 +0100 Subject: [PATCH 006/100] fixed qmckl_compute_factor_ee --- org/qmckl_jastrow.org | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 32e2fb4..a55d85a 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -1644,20 +1644,20 @@ end function qmckl_compute_factor_ee_f for (int nw = 0; nw < walk_num; ++nw) { factor_ee[nw] = 0.0; // put init array here. - for (int j = 0; j < elec_num; ++j ) { - for (int i = 0; i < j; ++i) { + for (int i = 0; i < elec_num; ++i ) { + for (int j = 0; j < i; ++j) { x = ee_distance_rescaled[j * (walk_num * elec_num) + i * (walk_num) + nw]; x1 = x; power_ser = 0.0; spin_fact = 1.0; ipar = 0; // index of asymp_jasb - for (int p = 1; p <= bord_num; ++p) { + for (int p = 1; p < bord_num; ++p) { x = x * x1; power_ser = power_ser + bord_vector[p + 1] * x; } - if(j <= up_num || i > up_num) { + if(i < up_num || j >= up_num) { spin_fact = 0.5; ipar = 1; } From 05cfd10cc48fe728905de11127bce640ccdfe7de Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Wed, 16 Feb 2022 14:14:05 +0100 Subject: [PATCH 007/100] completed qmckl_compute_factor_en --- org/qmckl_jastrow.org | 115 +++++++++++++++++++++++++----------------- 1 file changed, 68 insertions(+), 47 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index c46fe63..a177d91 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -2442,6 +2442,74 @@ integer function qmckl_compute_factor_en_f(context, walk_num, elec_num, nucl_num end function qmckl_compute_factor_en_f #+end_src + + + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_factor_en ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const int64_t aord_num, + const double* aord_vector, + const double* en_distance_rescaled, + double* const factor_en ) { + + + int ipar; + double x, x1, spin_fact, power_ser; + + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (walk_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (nucl_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + if (aord_num <= 0) { + return QMCKL_INVALID_ARG_7; + } + + + for (int nw = 0; nw < walk_num; ++nw ) { + // init array + factor_en[nw] = 0.0; + for (int a = 0; a < nucl_num; ++a ) { + for (int i = 0; i < elec_num; ++i ) { + // x = ee_distance_rescaled[j * (walk_num * elec_num) + i * (walk_num) + nw]; + x = en_distance_rescaled[i * (walk_num * nucl_num) + a * (walk_num) + nw]; + x1 = x; + power_ser = 0.0; + + for (int p = 2; p < aord_num+1; ++p) { + x = x * x1; + power_ser = power_ser + aord_vector[(p+1)-1 + (type_nucl_vector[a]-1) * aord_num] * x; + } + + factor_en[nw] = factor_en[nw] + aord_vector[0 + (type_nucl_vector[a]-1)*aord_num] * x1 / \ + (1.0 + aord_vector[1 + (type_nucl_vector[a]-1) * aord_num] * x1) + \ + power_ser; + + } + } + } + + return QMCKL_SUCCESS; +} + #+end_src + #+CALL: generate_c_header(table=qmckl_factor_en_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -2461,53 +2529,6 @@ end function qmckl_compute_factor_en_f #+end_src - #+CALL: generate_c_interface(table=qmckl_factor_en_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_factor_en & - (context, & - walk_num, & - elec_num, & - nucl_num, & - type_nucl_num, & - type_nucl_vector, & - aord_num, & - aord_vector, & - en_distance_rescaled, & - factor_en) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: walk_num - integer (c_int64_t) , intent(in) , value :: elec_num - integer (c_int64_t) , intent(in) , value :: nucl_num - integer (c_int64_t) , intent(in) , value :: type_nucl_num - integer (c_int64_t) , intent(in) :: type_nucl_vector(nucl_num) - integer (c_int64_t) , intent(in) , value :: aord_num - real (c_double ) , intent(in) :: aord_vector(aord_num + 1, type_nucl_num) - real (c_double ) , intent(in) :: en_distance_rescaled(elec_num, nucl_num, walk_num) - real (c_double ) , intent(out) :: factor_en(walk_num) - - integer(c_int32_t), external :: qmckl_compute_factor_en_f - info = qmckl_compute_factor_en_f & - (context, & - walk_num, & - elec_num, & - nucl_num, & - type_nucl_num, & - type_nucl_vector, & - aord_num, & - aord_vector, & - en_distance_rescaled, & - factor_en) - - end function qmckl_compute_factor_en - #+end_src - *** Test #+begin_src python :results output :exports none :noweb yes import numpy as np From bf8cec7f3cf39c3bb8b100675401304b60da4d8b Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Wed, 16 Feb 2022 18:09:02 +0100 Subject: [PATCH 008/100] reordered index in ee_distance_rescaled that makes sense --- org/qmckl_jastrow.org | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index a177d91..eabde1e 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -1735,7 +1735,8 @@ end function qmckl_compute_factor_ee_f factor_ee[nw] = 0.0; // put init array here. for (int i = 0; i < elec_num; ++i ) { for (int j = 0; j < i; ++j) { - x = ee_distance_rescaled[j * (walk_num * elec_num) + i * (walk_num) + nw]; + //x = ee_distance_rescaled[j * (walk_num * elec_num) + i * (walk_num) + nw]; + x = ee_distance_rescaled[j + i * elec_num + nw*(elec_num * elec_num)]; x1 = x; power_ser = 0.0; spin_fact = 1.0; From bb2e8384e8b8cde1bab3edef6bbc44c8949b0ba2 Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Wed, 16 Feb 2022 18:13:57 +0100 Subject: [PATCH 009/100] fix reorder of nex function --- org/qmckl_jastrow.org | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index eabde1e..7b1f0b8 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -2490,7 +2490,7 @@ qmckl_exit_code qmckl_compute_factor_en ( for (int a = 0; a < nucl_num; ++a ) { for (int i = 0; i < elec_num; ++i ) { // x = ee_distance_rescaled[j * (walk_num * elec_num) + i * (walk_num) + nw]; - x = en_distance_rescaled[i * (walk_num * nucl_num) + a * (walk_num) + nw]; + x = en_distance_rescaled[i + a * elec_num + nw * (elec_num * nucl_num)]; x1 = x; power_ser = 0.0; From 2427d1b56ebc836c603a8466ac6098fd2d8820f9 Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Tue, 22 Feb 2022 14:52:21 +0100 Subject: [PATCH 010/100] working qmckl_compute_een_rescaled_n --- org/qmckl_jastrow.org | 106 +++++++++++++++++++++++++----------------- 1 file changed, 64 insertions(+), 42 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 7b1f0b8..9867010 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -3855,6 +3855,70 @@ integer function qmckl_compute_een_rescaled_n_f(context, walk_num, elec_num, nuc end function qmckl_compute_een_rescaled_n_f #+end_src + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_een_rescaled_n ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t cord_num, + const double rescale_factor_kappa_en, + const double* en_distance, + double* const een_rescaled_n ) { + + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (walk_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (nucl_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_5; + } + + // Prepare table of exponentiated distances raised to appropriate power + for (int i = 0; i < (walk_num*(cord_num+1)*nucl_num*elec_num); ++i) { + een_rescaled_n[i] = 17.0; + } + + for (int nw = 0; nw < walk_num; ++nw) { + for (int a = 0; a < nucl_num; ++a) { + for (int i = 0; i < elec_num; ++i) { + // prepare the actual een table + //een_rescaled_n(:, :, 0, nw) = 1.0d0 + een_rescaled_n[i + a * elec_num + 0 + nw * elec_num*nucl_num*(cord_num+1)] = 1.0; + //een_rescaled_n(i, a, 1, nw) = dexp(-rescale_factor_kappa_en * en_distance(i, a, nw)) + een_rescaled_n[i + a*elec_num + elec_num*nucl_num + nw*elec_num*nucl_num*(cord_num+1)] = exp(-rescale_factor_kappa_en * \ + en_distance[i + a*elec_num + nw*elec_num*nucl_num]); + } + } + + for (int l = 2; l < (cord_num+1); ++l){ + for (int a = 0; a < nucl_num; ++a) { + for (int i = 0; i < elec_num; ++i) { + een_rescaled_n[i + a*elec_num + l*elec_num*nucl_num + nw*elec_num*nucl_num*(cord_num+1)] = een_rescaled_n[i + a*elec_num + (l-1)*elec_num*nucl_num + nw*elec_num*nucl_num*(cord_num+1)] *\ + een_rescaled_n[i + a*elec_num + elec_num*nucl_num + nw*elec_num*nucl_num*(cord_num+1)]; + } + } + } + + } + + return QMCKL_SUCCESS; +} + #+end_src + #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_n_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: @@ -3870,47 +3934,6 @@ end function qmckl_compute_een_rescaled_n_f double* const een_rescaled_n ); #+end_src - #+CALL: generate_c_interface(table=qmckl_factor_een_rescaled_n_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_een_rescaled_n & - (context, & - walk_num, & - elec_num, & - nucl_num, & - cord_num, & - rescale_factor_kappa_en, & - en_distance, & - een_rescaled_n) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: walk_num - integer (c_int64_t) , intent(in) , value :: elec_num - integer (c_int64_t) , intent(in) , value :: nucl_num - integer (c_int64_t) , intent(in) , value :: cord_num - real (c_double ) , intent(in) , value :: rescale_factor_kappa_en - real (c_double ) , intent(in) :: en_distance(nucl_num,elec_num,walk_num) - real (c_double ) , intent(out) :: een_rescaled_n(nucl_num,elec_num,0:cord_num,walk_num) - - integer(c_int32_t), external :: qmckl_compute_een_rescaled_n_f - info = qmckl_compute_een_rescaled_n_f & - (context, & - walk_num, & - elec_num, & - nucl_num, & - cord_num, & - rescale_factor_kappa_en, & - en_distance, & - een_rescaled_n) - - end function qmckl_compute_een_rescaled_n - #+end_src - *** Test #+begin_src python :results output :exports none :noweb yes @@ -3969,7 +3992,6 @@ assert(fabs(een_rescaled_n[0][1][0][4]-0.023391817607642338) < 1.e-12); assert(fabs(een_rescaled_n[0][2][1][3]-0.880957224822116) < 1.e-12); assert(fabs(een_rescaled_n[0][2][1][4]-0.027185942659395074) < 1.e-12); assert(fabs(een_rescaled_n[0][2][1][5]-0.01343938025140174) < 1.e-12); - #+end_src ** Electron-nucleus rescaled distances for each order and derivatives From 4fac9f06c9f86e6072fac909c1132f403f6f6267 Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Mon, 28 Feb 2022 17:47:24 +0100 Subject: [PATCH 011/100] to be checked --- org/qmckl_jastrow.org | 61 +++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index aac48b8..9674f47 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -4962,6 +4962,45 @@ integer function qmckl_compute_dim_cord_vect_f(context, cord_num, dim_cord_vect) end function qmckl_compute_dim_cord_vect_f #+end_src + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_dim_cord_vect ( + const qmckl_context context, + const int64_t cord_num, + int64_t* const dim_cord_vect){ + + int lmax; + + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + dim_cord_vect = 0; + + for (int p=2; p < cord_num; ++p){ + for (int k=p-1; k <= 0; --k) { + if (k != 0) { + lmax = p - k; + } else { + lmax = p - k - 2; + } + for (l = lmax; l <= 0; --l) { +// if ( iand(p - k - l, 1_8) == 1) continue; +// Does it make sense? it should + if ( ((p - k - l) & 1)==1) continue; + dim_cord_vect = dim_cord_vect + 1; + } + } + } + + return QMCKL_SUCCESS; +} + #+end_src + #+CALL: generate_c_header(table=qmckl_factor_dim_cord_vect_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: @@ -4973,28 +5012,6 @@ end function qmckl_compute_dim_cord_vect_f #+end_src - #+CALL: generate_c_interface(table=qmckl_factor_dim_cord_vect_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_dim_cord_vect & - (context, cord_num, dim_cord_vect) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: cord_num - integer (c_int64_t) , intent(out) :: dim_cord_vect - - integer(c_int32_t), external :: qmckl_compute_dim_cord_vect_f - info = qmckl_compute_dim_cord_vect_f & - (context, cord_num, dim_cord_vect) - - end function qmckl_compute_dim_cord_vect - #+end_src - *** Compute cord_vect_full :PROPERTIES: :Name: qmckl_compute_cord_vect_full From d13693a822c22acff50d9b7b640fc94d4a71751c Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Wed, 2 Mar 2022 10:18:45 +0100 Subject: [PATCH 012/100] to be cleaned from print --- org/qmckl_jastrow.org | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 9674f47..518ddfb 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -4979,23 +4979,28 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( return QMCKL_INVALID_ARG_2; } - dim_cord_vect = 0; + printf("hello %d\n", *dim_cord_vect); + *dim_cord_vect = 0; for (int p=2; p < cord_num; ++p){ + printf("in da loop\n"); for (int k=p-1; k <= 0; --k) { if (k != 0) { lmax = p - k; } else { lmax = p - k - 2; } - for (l = lmax; l <= 0; --l) { + printf("lmax %d\n", lmax); + for (int l = lmax; l <= 0; --l) { // if ( iand(p - k - l, 1_8) == 1) continue; // Does it make sense? it should if ( ((p - k - l) & 1)==1) continue; - dim_cord_vect = dim_cord_vect + 1; + printf("in da loop\n"); + *dim_cord_vect = *dim_cord_vect + 1; } } } + printf("hello 2 %d\n", *dim_cord_vect); return QMCKL_SUCCESS; } From a4ba5deac4a74fe48c8d928408d8d611f285826b Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Wed, 9 Mar 2022 11:15:15 +0100 Subject: [PATCH 013/100] compute_dim_cord_vect done --- org/qmckl_jastrow.org | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 518ddfb..74fe58e 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -4979,29 +4979,22 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( return QMCKL_INVALID_ARG_2; } - printf("hello %d\n", *dim_cord_vect); *dim_cord_vect = 0; - - for (int p=2; p < cord_num; ++p){ - printf("in da loop\n"); - for (int k=p-1; k <= 0; --k) { + + for (int p=2; p <= cord_num; ++p){ + for (int k=p-1; k >= 0; --k) { if (k != 0) { lmax = p - k; } else { lmax = p - k - 2; } - printf("lmax %d\n", lmax); - for (int l = lmax; l <= 0; --l) { -// if ( iand(p - k - l, 1_8) == 1) continue; -// Does it make sense? it should + for (int l = lmax; l >= 0; --l) { if ( ((p - k - l) & 1)==1) continue; - printf("in da loop\n"); - *dim_cord_vect = *dim_cord_vect + 1; + *dim_cord_vect=*dim_cord_vect+1; } } } - printf("hello 2 %d\n", *dim_cord_vect); - + return QMCKL_SUCCESS; } #+end_src From b222ee3156a273a8580964c1de03096bd3b1b5fc Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Mon, 14 Mar 2022 11:21:31 +0100 Subject: [PATCH 014/100] lkpm works --- org/qmckl_jastrow.org | 73 +++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 24 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 74fe58e..423f152 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5128,7 +5128,7 @@ end function qmckl_compute_cord_vect_full_f | ~context~ | ~qmckl_context~ | in | Global state | | ~cord_num~ | ~int64_t~ | in | Order of polynomials | | ~dim_cord_vect~ | ~int64_t~ | in | dimension of cord full table | - | ~lpkm_combined_index~ | ~int64_t[4][dim_cord_vect]~ | out | Full list of combined indices | + | ~lkpm_combined_index~ | ~int64_t[4][dim_cord_vect]~ | out | Full list of combined indices | #+begin_src f90 :comments org :tangle (eval f) :noweb yes integer function qmckl_compute_lkpm_combined_index_f(context, cord_num, dim_cord_vect, & @@ -5184,6 +5184,53 @@ integer function qmckl_compute_lkpm_combined_index_f(context, cord_num, dim_cord end function qmckl_compute_lkpm_combined_index_f #+end_src + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_lkpm_combined_index ( + const qmckl_context context, + const int64_t cord_num, + const int64_t dim_cord_vect, + int64_t* const lkpm_combined_index ) { + + int kk, lmax, m; + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (dim_cord_vect <= 0) { + return QMCKL_INVALID_ARG_3; + } + +/* +*/ + kk = 0; + for (int p = 2; p <= cord_num; ++p) { + for (int k=(p-1); k >= 0; --k) { + if (k != 0) { + lmax = p - k; + } else { + lmax = p - k - 2; + } + for (int l=lmax; l >= 0; --l) { + if (((p - k - l) & 1) == 1) continue; + m = (p - k - l)/2; + lkpm_combined_index[kk ] = l; + lkpm_combined_index[kk + dim_cord_vect] = k; + lkpm_combined_index[kk + 2*dim_cord_vect] = p; + lkpm_combined_index[kk + 3*dim_cord_vect] = m; + kk = kk + 1; + } + } + } + + return QMCKL_SUCCESS; +} + #+end_src + #+CALL: generate_c_header(table=qmckl_factor_lkpm_combined_index_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: @@ -5192,32 +5239,10 @@ end function qmckl_compute_lkpm_combined_index_f const qmckl_context context, const int64_t cord_num, const int64_t dim_cord_vect, - int64_t* const lpkm_combined_index ); + int64_t* const lkpm_combined_index ); #+end_src - #+CALL: generate_c_interface(table=qmckl_factor_lkpm_combined_index_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_lkpm_combined_index & - (context, cord_num, dim_cord_vect, lpkm_combined_index) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: cord_num - integer (c_int64_t) , intent(in) , value :: dim_cord_vect - integer (c_int64_t) , intent(out) :: lpkm_combined_index(dim_cord_vect,4) - - integer(c_int32_t), external :: qmckl_compute_lkpm_combined_index_f - info = qmckl_compute_lkpm_combined_index_f & - (context, cord_num, dim_cord_vect, lpkm_combined_index) - - end function qmckl_compute_lkpm_combined_index - #+end_src *** Compute tmp_c :PROPERTIES: From 6b45157212c211f398dca3c2c99eeef859852029 Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Thu, 17 Mar 2022 17:46:21 +0100 Subject: [PATCH 015/100] tmp_c done --- org/qmckl_jastrow.org | 94 ++++++++++++++++++++++++++++++------------- 1 file changed, 67 insertions(+), 27 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 423f152..4f777da 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5330,6 +5330,73 @@ integer function qmckl_compute_tmp_c_f(context, cord_num, elec_num, nucl_num, & end function qmckl_compute_tmp_c_f #+end_src + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_tmp_c ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ) { + + qmckl_exit_code info; + int i, j, a, l, kk, p, lmax, nw; + char TransA, TransB; + double alpha, beta; + int M, N, K, LDA, LDB, LDC; + + TransA = 'N'; + TransB = 'N'; + alpha = 1.0; + beta = 0.0; + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (nucl_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + M = elec_num; + N = nucl_num*(cord_num + 1); + K = elec_num; + + LDA = sizeof(een_rescaled_e)/sizeof(double); + LDB = sizeof(een_rescaled_n)/sizeof(double); + LDC = sizeof(tmp_c)/sizeof(double); + +// DOING + for (int nw=0; nw < walk_num; ++nw) { + for (int i=0; i Date: Thu, 17 Mar 2022 22:27:10 +0100 Subject: [PATCH 016/100] dtmp_c done --- org/qmckl_jastrow.org | 95 ++++++++++++++++++++++++++++++------------- 1 file changed, 66 insertions(+), 29 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 4f777da..9b0370c 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5376,7 +5376,6 @@ qmckl_exit_code qmckl_compute_tmp_c ( LDB = sizeof(een_rescaled_n)/sizeof(double); LDC = sizeof(tmp_c)/sizeof(double); -// DOING for (int nw=0; nw < walk_num; ++nw) { for (int i=0; i Date: Thu, 24 Mar 2022 10:06:25 +0100 Subject: [PATCH 017/100] Add detection of configure arguments to enable GPU offloading As of now, only OpenMP offload will be implemented as a test. --- configure.ac | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/configure.ac b/configure.ac index 56f0ed1..668fe2a 100644 --- a/configure.ac +++ b/configure.ac @@ -218,6 +218,29 @@ AS_IF([test "$HAVE_HPC" = "yes"], [ AC_DEFINE([HAVE_HPC], [1], [If defined, activate HPC routines]) ]) +# Enable Verificarlo tests +AC_ARG_ENABLE([vfc_ci], +[ --enable-vfc_ci Build the library with vfc_ci support], +[case "${enableval}" in + yes) vfc_ci=true && FCFLAGS="-D VFC_CI $FCFLAGS" && CFLAGS="-D VFC_CI $CFLAGS";; + no) vfc_ci=false ;; + *) AC_MSG_ERROR([bad value ${enableval} for --enable_vfc_ci]) ;; +esac],[vfc_ci=false]) +AM_CONDITIONAL([VFC_CI], [test x$vfc_ci = xtrue]) + +if test "$FC" = "verificarlo-f"; then + AC_MSG_NOTICE(verificarlo-f detected) + # Arguments order is important here + FCFLAGS="-Mpreprocess $FCFLAGS" +fi + +# Enable GPU offloading +# OpenMP offloading +AC_ARG_ENABLE(openmp-offload, [AS_HELP_STRING([--openmp-offload],[Use OpenMP-offloaded functions])], HAVE_OPENMP_OFFLOAD=$enableval, HAVE_OPENMP_OFFLOAD=no) +AS_IF([test "$HAVE_OPENMP_OFFLOAD" = "yes"], [ + AC_DEFINE([HAVE_OPENMP_OFFLOAD], [1], [If defined, activate OpenMP-offloaded routines]) +]) + AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug],[compile for debugging])], ok=$enableval, ok=no) if test "$ok" = "yes"; then if test "$GCC" = "yes"; then @@ -313,21 +336,6 @@ if test "x${QMCKL_DEVEL}" != "x"; then fi -# Enable Verificarlo tests -AC_ARG_ENABLE([vfc_ci], -[ --enable-vfc_ci Build the library with vfc_ci support], -[case "${enableval}" in - yes) vfc_ci=true && FCFLAGS="-D VFC_CI $FCFLAGS" && CFLAGS="-D VFC_CI $CFLAGS";; - no) vfc_ci=false ;; - *) AC_MSG_ERROR([bad value ${enableval} for --enable_vfc_ci]) ;; -esac],[vfc_ci=false]) -AM_CONDITIONAL([VFC_CI], [test x$vfc_ci = xtrue]) - -if test "$FC" = "verificarlo-f"; then - AC_MSG_NOTICE(verificarlo-f detected) - # Arguments order is important here - FCFLAGS="-Mpreprocess $FCFLAGS" -fi #PKG-CONFIG #mkl-dynamic-lp64-seq @@ -363,6 +371,7 @@ LDFLAGS:........: ${LDFLAGS} LIBS............: ${LIBS} USE CHAMELEON...: ${with_chameleon} HPC version.....: ${HAVE_HPC} +OpenMP offload .: ${HAVE_OPENMP_OFFLOAD} Package features: ${ARGS} From 5e3231e7e39fd3f07bc68e7e9d8ad7875aa8dd47 Mon Sep 17 00:00:00 2001 From: Aurelien Delval Date: Thu, 24 Mar 2022 16:35:29 +0100 Subject: [PATCH 018/100] Add selection mechanism for offload mode in Jastrow This system adds an additional field to the QMCkl context to store the offload mode currently in use for each kernel (in this commit, this has been implemented for Jastrow as an example). This will be useful to test different offloading versions that can be easily toggled on/off at compilation and at runtime. --- org/ao_grid.f90 | 114 ++++++++++++++++++ org/qmckl_jastrow.org | 269 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 382 insertions(+), 1 deletion(-) create mode 100644 org/ao_grid.f90 diff --git a/org/ao_grid.f90 b/org/ao_grid.f90 new file mode 100644 index 0000000..685313f --- /dev/null +++ b/org/ao_grid.f90 @@ -0,0 +1,114 @@ +subroutine qmckl_check_error(rc, message) + use qmckl + implicit none + integer(qmckl_exit_code), intent(in) :: rc + character(len=*) , intent(in) :: message + character(len=128) :: str_buffer + if (rc /= QMCKL_SUCCESS) then + print *, message + call qmckl_string_of_error(rc, str_buffer) + print *, str_buffer + call exit(rc) + end if +end subroutine qmckl_check_error + +program ao_grid + use qmckl + implicit none + + integer(qmckl_context) :: qmckl_ctx ! QMCkl context + integer(qmckl_exit_code) :: rc ! Exit code of QMCkl functions + + character(len=128) :: trexio_filename + character(len=128) :: str_buffer + integer :: ao_id + integer :: point_num_x + + integer(c_int64_t) :: nucl_num + double precision, allocatable :: nucl_coord(:,:) + + integer(c_int64_t) :: point_num + integer(c_int64_t) :: ao_num + integer(c_int64_t) :: ipoint, i, j, k + double precision :: x, y, z, dr(3) + double precision :: rmin(3), rmax(3) + double precision, allocatable :: points(:,:) + double precision, allocatable :: ao_vgl(:,:,:) + +if (iargc() /= 3) then + print *, 'Syntax: ao_grid ' + call exit(-1) +end if +call getarg(1, trexio_filename) +call getarg(2, str_buffer) +read(str_buffer, *) ao_id +call getarg(3, str_buffer) +read(str_buffer, *) point_num_x + +if (point_num_x < 0 .or. point_num_x > 300) then + print *, 'Error: 0 < point_num < 300' + call exit(-1) +end if + +qmckl_ctx = qmckl_context_create() +rc = qmckl_trexio_read(qmckl_ctx, trexio_filename, 1_8*len(trim(trexio_filename))) +call qmckl_check_error(rc, 'Read TREXIO') + +rc = qmckl_get_ao_basis_ao_num(qmckl_ctx, ao_num) +call qmckl_check_error(rc, 'Getting ao_num') + +if (ao_id < 0 .or. ao_id > ao_num) then + print *, 'Error: 0 < ao_id < ', ao_num + call exit(-1) +end if + +rc = qmckl_get_nucleus_num(qmckl_ctx, nucl_num) +call qmckl_check_error(rc, 'Get nucleus num') + +allocate( nucl_coord(3, nucl_num) ) +rc = qmckl_get_nucleus_coord(qmckl_ctx, 'N', nucl_coord, 3_8*nucl_num) +call qmckl_check_error(rc, 'Get nucleus coord') + +rmin(1) = minval( nucl_coord(1,:) ) - 5.d0 +rmin(2) = minval( nucl_coord(2,:) ) - 5.d0 +rmin(3) = minval( nucl_coord(3,:) ) - 5.d0 + +rmax(1) = maxval( nucl_coord(1,:) ) + 5.d0 +rmax(2) = maxval( nucl_coord(2,:) ) + 5.d0 +rmax(3) = maxval( nucl_coord(3,:) ) + 5.d0 + +dr(1:3) = (rmax(1:3) - rmin(1:3)) / dble(point_num_x-1) + +point_num = point_num_x**3 +allocate( points(point_num, 3) ) +ipoint=0 +z = rmin(3) +do k=1,point_num_x + y = rmin(2) + do j=1,point_num_x + x = rmin(1) + do i=1,point_num_x + ipoint = ipoint+1 + points(ipoint,1) = x + points(ipoint,2) = y + points(ipoint,3) = z + x = x + dr(1) + end do + y = y + dr(2) + end do + z = z + dr(3) +end do + +rc = qmckl_set_point(qmckl_ctx, 'T', points, point_num) +call qmckl_check_error(rc, 'Setting points') + +allocate( ao_vgl(ao_num, 5, point_num) ) +rc = qmckl_get_ao_basis_ao_vgl(qmckl_ctx, ao_vgl, ao_num*5_8*point_num) +call qmckl_check_error(rc, 'Setting points') + +do ipoint=1, point_num + print '(3(F16.10,X),E20.10)', points(ipoint, 1:3), ao_vgl(ao_id,1,ipoint) +end do + +deallocate( nucl_coord, points, ao_vgl ) +end program ao_grid diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 61062af..6a2c2a2 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -327,7 +327,14 @@ kappa_inv = 1.0/kappa ** Data structure - #+begin_src c :comments org :tangle (eval h_private_type) +#+begin_src c :comments org :tangle (eval h_type) +typedef enum qmckl_jastrow_offload_type{ + OFFLOAD_NONE, + OFFLOAD_OPENMP +} qmckl_jastrow_offload_type; +#+end_src + +#+begin_src c :comments org :tangle (eval h_private_type) typedef struct qmckl_jastrow_struct{ int32_t uninitialized; int64_t aord_num; @@ -372,6 +379,7 @@ typedef struct qmckl_jastrow_struct{ uint64_t een_rescaled_n_deriv_e_date; bool provided; char * type; + qmckl_jastrow_offload_type offload_type; } qmckl_jastrow_struct; #+end_src @@ -416,6 +424,7 @@ qmckl_exit_code qmckl_get_jastrow_type_nucl_vector (qmckl_context context, int qmckl_exit_code qmckl_get_jastrow_aord_vector (qmckl_context context, double * const aord_vector, const int64_t size_max); qmckl_exit_code qmckl_get_jastrow_bord_vector (qmckl_context context, double * const bord_vector, const int64_t size_max); qmckl_exit_code qmckl_get_jastrow_cord_vector (qmckl_context context, double * const cord_vector, const int64_t size_max); +qmckl_exit_code qmckl_get_jastrow_offload_type (qmckl_context context, qmckl_jastrow_offload_type * const offload_type); #+end_src Along with these core functions, calculation of the jastrow factor @@ -713,6 +722,32 @@ qmckl_get_jastrow_cord_vector (const qmckl_context context, return QMCKL_SUCCESS; } +qmckl_exit_code qmckl_get_jastrow_offload_type (const qmckl_context context, qmckl_jastrow_offload_type* const offload_type) { + + if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { + return (char) 0; + } + + if (offload_type == NULL) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_2, + "qmckl_get_jastrow_offload_type", + "offload_type is a null pointer"); + } + + qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; + assert (ctx != NULL); + + int32_t mask = 1 << 0; + + if ( (ctx->jastrow.uninitialized & mask) != 0) { + return QMCKL_NOT_PROVIDED; + } + + *offload_type = ctx->jastrow.offload_type; + return QMCKL_SUCCESS; +} + #+end_src ** Initialization functions @@ -727,6 +762,7 @@ qmckl_exit_code qmckl_set_jastrow_type_nucl_vector (qmckl_context context, con qmckl_exit_code qmckl_set_jastrow_aord_vector (qmckl_context context, const double * aord_vector, const int64_t size_max); qmckl_exit_code qmckl_set_jastrow_bord_vector (qmckl_context context, const double * bord_vector, const int64_t size_max); qmckl_exit_code qmckl_set_jastrow_cord_vector (qmckl_context context, const double * cord_vector, const int64_t size_max); +qmckl_exit_code qmckl_set_jastrow_offload_type (qmckl_context context, const qmckl_jastrow_offload_type offload_type); #+end_src #+NAME:pre2 @@ -1063,6 +1099,14 @@ qmckl_set_jastrow_cord_vector(qmckl_context context, <> } +qmckl_exit_code +qmckl_set_jastrow_offload_type(qmckl_context context, const qmckl_jastrow_offload_type offload_type) +{ +<> + ctx->jastrow.offload_type = offload_type; + return QMCKL_SUCCESS; +} + #+end_src When the required information is completely entered, other data structures are @@ -6093,6 +6137,30 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) ctx->jastrow.factor_een_deriv_e = factor_een_deriv_e; } + /* Choose the correct compute function (depending on offload type) */ + bool default_compute = true; + +#ifdef HAVE_OPENMP_OFFLOAD + if(ctx->jastrow.offload_type == OFFLOAD_OPENMP) { + qmckl_exit_code rc = + qmckl_compute_factor_een_deriv_e_omp_offload(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.cord_vect_full, + ctx->jastrow.lkpm_combined_index, + ctx->jastrow.tmp_c, + ctx->jastrow.dtmp_c, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.een_rescaled_n_deriv_e, + ctx->jastrow.factor_een_deriv_e); + default_compute = false; + } +#endif + + if(default_compute) { qmckl_exit_code rc = qmckl_compute_factor_een_deriv_e(context, ctx->electron.walk_num, @@ -6107,6 +6175,8 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) ctx->jastrow.een_rescaled_n, ctx->jastrow.een_rescaled_n_deriv_e, ctx->jastrow.factor_een_deriv_e); + } + if (rc != QMCKL_SUCCESS) { return rc; } @@ -6507,6 +6577,203 @@ end function qmckl_compute_factor_een_deriv_e_f end function qmckl_compute_factor_een_deriv_e #+end_src +*** Compute (OpenMP offload)... + :PROPERTIES: + :Name: qmckl_compute_factor_een_deriv_e + :CRetType: qmckl_exit_code + :FRetType: qmckl_exit_code + :END: + + #+NAME: qmckl_factor_een_deriv_e_omp_offload_args + | Variable | Type | In/Out | Description | + |--------------------------+---------------------------------------------------------------------+--------+------------------------------------------------| + | ~context~ | ~qmckl_context~ | in | Global state | + | ~walk_num~ | ~int64_t~ | in | Number of walkers | + | ~elec_num~ | ~int64_t~ | in | Number of electrons | + | ~nucl_num~ | ~int64_t~ | in | Number of nucleii | + | ~cord_num~ | ~int64_t~ | in | order of polynomials | + | ~dim_cord_vect~ | ~int64_t~ | in | dimension of full coefficient vector | + | ~cord_vect_full~ | ~double[dim_cord_vect][nucl_num]~ | in | full coefficient vector | + | ~lkpm_combined_index~ | ~int64_t[4][dim_cord_vect]~ | in | combined indices | + | ~tmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | in | Temporary intermediate tensor | + | ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][4][elec_num]~ | in | vector of non-zero coefficients | + | ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor | + | ~een_rescaled_n_deriv_e~ | ~double[walk_num][0:cord_num][nucl_num][4][elec_num]~ | in | Derivative of Electron-nucleus rescaled factor | + | ~factor_een_deriv_e~ | ~double[walk_num][4][elec_num]~ | out | Derivative of Electron-nucleus jastrow | + + + #+begin_src f90 :comments org :tangle (eval f) :noweb yes +#ifdef HAVE_OPENMP_OFFLOAD +integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_num, elec_num, nucl_num, cord_num, dim_cord_vect, & + cord_vect_full, lkpm_combined_index, & + tmp_c, dtmp_c, een_rescaled_n, een_rescaled_n_deriv_e, factor_een_deriv_e) & + result(info) + use qmckl + implicit none + integer(qmckl_context), intent(in) :: context + integer*8 , intent(in) :: walk_num, elec_num, cord_num, nucl_num, dim_cord_vect + integer*8 , intent(in) :: lkpm_combined_index(dim_cord_vect,4) + double precision , intent(in) :: cord_vect_full(nucl_num, dim_cord_vect) + double precision , intent(in) :: tmp_c(elec_num, nucl_num,0:cord_num, 0:cord_num-1, walk_num) + double precision , intent(in) :: dtmp_c(elec_num, 4, nucl_num,0:cord_num, 0:cord_num-1, walk_num) + double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num) + double precision , intent(in) :: een_rescaled_n_deriv_e(elec_num, 4, nucl_num, 0:cord_num, walk_num) + double precision , intent(out) :: factor_een_deriv_e(elec_num,4,walk_num) + + integer*8 :: i, a, j, l, k, p, m, n, nw, ii + double precision :: accu, accu2, cn + + info = QMCKL_SUCCESS + + if (context == QMCKL_NULL_CONTEXT) then + info = QMCKL_INVALID_CONTEXT + return + endif + + if (walk_num <= 0) then + info = QMCKL_INVALID_ARG_2 + return + endif + + if (elec_num <= 0) then + info = QMCKL_INVALID_ARG_3 + return + endif + + if (nucl_num <= 0) then + info = QMCKL_INVALID_ARG_4 + return + endif + + if (cord_num <= 0) then + info = QMCKL_INVALID_ARG_5 + return + endif + + factor_een_deriv_e = 0.0d0 + + do nw =1, walk_num + do n = 1, dim_cord_vect + l = lkpm_combined_index(n, 1) + k = lkpm_combined_index(n, 2) + p = lkpm_combined_index(n, 3) + m = lkpm_combined_index(n, 4) + + do a = 1, nucl_num + cn = cord_vect_full(a, n) + if(cn == 0.d0) cycle + + do ii = 1, 4 + do j = 1, elec_num + factor_een_deriv_e(j,ii,nw) = factor_een_deriv_e(j,ii,nw) + (& + tmp_c(j,a,m,k,nw) * een_rescaled_n_deriv_e(j,ii,a,m+l,nw) + & + (dtmp_c(j,ii,a,m,k,nw)) * een_rescaled_n(j,a,m+l,nw) + & + (dtmp_c(j,ii,a,m+l,k,nw)) * een_rescaled_n(j,a,m ,nw) + & + tmp_c(j,a,m+l,k,nw) * een_rescaled_n_deriv_e(j,ii,a,m,nw) & + ) * cn + end do + end do + + cn = cn + cn + do j = 1, elec_num + factor_een_deriv_e(j,4,nw) = factor_een_deriv_e(j,4,nw) + (& + (dtmp_c(j,1,a,m ,k,nw)) * een_rescaled_n_deriv_e(j,1,a,m+l,nw) + & + (dtmp_c(j,2,a,m ,k,nw)) * een_rescaled_n_deriv_e(j,2,a,m+l,nw) + & + (dtmp_c(j,3,a,m ,k,nw)) * een_rescaled_n_deriv_e(j,3,a,m+l,nw) + & + (dtmp_c(j,1,a,m+l,k,nw)) * een_rescaled_n_deriv_e(j,1,a,m ,nw) + & + (dtmp_c(j,2,a,m+l,k,nw)) * een_rescaled_n_deriv_e(j,2,a,m ,nw) + & + (dtmp_c(j,3,a,m+l,k,nw)) * een_rescaled_n_deriv_e(j,3,a,m ,nw) & + ) * cn + end do + end do + end do + end do + +end function qmckl_compute_factor_een_deriv_e_omp_offload_f +#endif + #+end_src + + #+CALL: generate_c_header(table=qmckl_factor_een_deriv_e_omp_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org +#ifdef HAVE_OPENMP_OFFLOAD + qmckl_exit_code qmckl_compute_factor_een_deriv_e_omp_offload ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t cord_num, + const int64_t dim_cord_vect, + const double* cord_vect_full, + const int64_t* lkpm_combined_index, + const double* tmp_c, + const double* dtmp_c, + const double* een_rescaled_n, + const double* een_rescaled_n_deriv_e, + double* const factor_een_deriv_e ); +#endif + #+end_src + + + #+CALL: generate_c_interface(table=qmckl_factor_een_deriv_e_omp_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src f90 :tangle (eval f) :comments org :exports none +#ifdef HAVE_OPENMP_OFFLOAD + integer(c_int32_t) function qmckl_compute_factor_een_deriv_e_omp_offload & + (context, & + walk_num, & + elec_num, & + nucl_num, & + cord_num, & + dim_cord_vect, & + cord_vect_full, & + lkpm_combined_index, & + tmp_c, & + dtmp_c, & + een_rescaled_n, & + een_rescaled_n_deriv_e, & + factor_een_deriv_e) & + bind(C) result(info) + + use, intrinsic :: iso_c_binding + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: walk_num + integer (c_int64_t) , intent(in) , value :: elec_num + integer (c_int64_t) , intent(in) , value :: nucl_num + integer (c_int64_t) , intent(in) , value :: cord_num + integer (c_int64_t) , intent(in) , value :: dim_cord_vect + real (c_double ) , intent(in) :: cord_vect_full(nucl_num,dim_cord_vect) + integer (c_int64_t) , intent(in) :: lkpm_combined_index(dim_cord_vect,4) + real (c_double ) , intent(in) :: tmp_c(elec_num,nucl_num,0:cord_num,0:cord_num-1,walk_num) + real (c_double ) , intent(in) :: dtmp_c(elec_num,4,nucl_num,0:cord_num,0:cord_num-1,walk_num) + real (c_double ) , intent(in) :: een_rescaled_n(elec_num,nucl_num,0:cord_num,walk_num) + real (c_double ) , intent(in) :: een_rescaled_n_deriv_e(elec_num,4,nucl_num,0:cord_num,walk_num) + real (c_double ) , intent(out) :: factor_een_deriv_e(elec_num,4,walk_num) + + integer(c_int32_t), external :: qmckl_compute_factor_een_deriv_e_omp_offload_f + info = qmckl_compute_factor_een_deriv_e_omp_offload_f & + (context, & + walk_num, & + elec_num, & + nucl_num, & + cord_num, & + dim_cord_vect, & + cord_vect_full, & + lkpm_combined_index, & + tmp_c, & + dtmp_c, & + een_rescaled_n, & + een_rescaled_n_deriv_e, & + factor_een_deriv_e) + + end function qmckl_compute_factor_een_deriv_e_omp_offload +#endif + #+end_src + *** Test #+begin_src python :results output :exports none :noweb yes import numpy as np From bcc49ca31215e6bc6456bf30153395e2e5b6d697 Mon Sep 17 00:00:00 2001 From: Aurelien Delval Date: Fri, 25 Mar 2022 13:03:35 +0100 Subject: [PATCH 019/100] Minor fixes to previous commit TODO Start modifying dedicated function to implement offloading Also, as of now, Fortran preprocessor flags should be passed manually, we need to manage this in the configure.ac in the future. For now, when using gfortran, you should pass FCFLAGS="-cpp -DWITH_OPENMP_OFFLOAD" to enable offloading. --- org/qmckl_jastrow.org | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 6a2c2a2..f97464b 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -6604,7 +6604,8 @@ end function qmckl_compute_factor_een_deriv_e_f #+begin_src f90 :comments org :tangle (eval f) :noweb yes #ifdef HAVE_OPENMP_OFFLOAD -integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_num, elec_num, nucl_num, cord_num, dim_cord_vect, & +! TODO Add some offload statements +integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_num, elec_num, nucl_num, cord_num, dim_cord_vect, & cord_vect_full, lkpm_combined_index, & tmp_c, dtmp_c, een_rescaled_n, een_rescaled_n_deriv_e, factor_een_deriv_e) & result(info) @@ -6715,8 +6716,7 @@ end function qmckl_compute_factor_een_deriv_e_omp_offload_f #endif #+end_src - - #+CALL: generate_c_interface(table=qmckl_factor_een_deriv_e_omp_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +#+CALL: generate_c_interface(table=qmckl_factor_een_deriv_e_omp_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none From 383c6ac78af4eaa0b4dc869aba9f4b80c9aad5d7 Mon Sep 17 00:00:00 2001 From: Aurelien Delval Date: Mon, 28 Mar 2022 07:58:01 +0200 Subject: [PATCH 020/100] Add OFFLOAD_FLAGS, OFFLOAD_CFLAGS and OFFLOAD_FCFLAGS vars to configure --- configure.ac | 3 +++ org/qmckl_jastrow.org | 2 ++ 2 files changed, 5 insertions(+) diff --git a/configure.ac b/configure.ac index 668fe2a..2993873 100644 --- a/configure.ac +++ b/configure.ac @@ -235,10 +235,13 @@ if test "$FC" = "verificarlo-f"; then fi # Enable GPU offloading + # OpenMP offloading AC_ARG_ENABLE(openmp-offload, [AS_HELP_STRING([--openmp-offload],[Use OpenMP-offloaded functions])], HAVE_OPENMP_OFFLOAD=$enableval, HAVE_OPENMP_OFFLOAD=no) AS_IF([test "$HAVE_OPENMP_OFFLOAD" = "yes"], [ AC_DEFINE([HAVE_OPENMP_OFFLOAD], [1], [If defined, activate OpenMP-offloaded routines]) + CFLAGS="$OFFLOAD_FLAGS $OFFLOAD_CFLAGS $CFLAGS" + FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS -DHAVE_OPENMP_OFFLOAD $FCFLAGS" ]) AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug],[compile for debugging])], ok=$enableval, ok=no) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index f97464b..cf0903c 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -6654,6 +6654,7 @@ integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_nu factor_een_deriv_e = 0.0d0 do nw =1, walk_num + !$omp target do n = 1, dim_cord_vect l = lkpm_combined_index(n, 1) k = lkpm_combined_index(n, 2) @@ -6688,6 +6689,7 @@ integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_nu end do end do end do + !$omp end target end do end function qmckl_compute_factor_een_deriv_e_omp_offload_f From 99306473a4cb418c81faf9656ccf8ec40c34bbea Mon Sep 17 00:00:00 2001 From: Aurelien Delval Date: Wed, 30 Mar 2022 09:01:32 +0200 Subject: [PATCH 021/100] Start OpenACC implementation in Jastro, including compute_dtmp_c --- configure.ac | 12 +-- org/qmckl_jastrow.org | 240 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 214 insertions(+), 38 deletions(-) diff --git a/configure.ac b/configure.ac index 2993873..8bd6747 100644 --- a/configure.ac +++ b/configure.ac @@ -236,12 +236,12 @@ fi # Enable GPU offloading -# OpenMP offloading -AC_ARG_ENABLE(openmp-offload, [AS_HELP_STRING([--openmp-offload],[Use OpenMP-offloaded functions])], HAVE_OPENMP_OFFLOAD=$enableval, HAVE_OPENMP_OFFLOAD=no) -AS_IF([test "$HAVE_OPENMP_OFFLOAD" = "yes"], [ - AC_DEFINE([HAVE_OPENMP_OFFLOAD], [1], [If defined, activate OpenMP-offloaded routines]) +# OpenACC offloading +AC_ARG_ENABLE(openacc-offload, [AS_HELP_STRING([--openacc-offload],[Use OpenACC-offloaded functions])], HAVE_OPENACC_OFFLOAD=$enableval, HAVE_OPENACC_OFFLOAD=no) +AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ + AC_DEFINE([HAVE_OPENACC_OFFLOAD], [1], [If defined, activate OpenACC-offloaded routines]) CFLAGS="$OFFLOAD_FLAGS $OFFLOAD_CFLAGS $CFLAGS" - FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS -DHAVE_OPENMP_OFFLOAD $FCFLAGS" + FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS -DHAVE_OPENACC_OFFLOAD $FCFLAGS" ]) AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug],[compile for debugging])], ok=$enableval, ok=no) @@ -374,7 +374,7 @@ LDFLAGS:........: ${LDFLAGS} LIBS............: ${LIBS} USE CHAMELEON...: ${with_chameleon} HPC version.....: ${HAVE_HPC} -OpenMP offload .: ${HAVE_OPENMP_OFFLOAD} +OpenACC offload : ${HAVE_OPENACC_OFFLOAD} Package features: ${ARGS} diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index cf0903c..70e1a8b 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -330,7 +330,7 @@ kappa_inv = 1.0/kappa #+begin_src c :comments org :tangle (eval h_type) typedef enum qmckl_jastrow_offload_type{ OFFLOAD_NONE, - OFFLOAD_OPENMP + OFFLOAD_OPENACC } qmckl_jastrow_offload_type; #+end_src @@ -4851,7 +4851,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; mem_info.size = (ctx->jastrow.cord_num) * (ctx->jastrow.cord_num + 1) - * 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); + ,* 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); double* dtmp_c = (double*) qmckl_malloc(context, mem_info); if (dtmp_c == NULL) { @@ -4863,8 +4863,13 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.dtmp_c = dtmp_c; } - qmckl_exit_code rc = - qmckl_compute_dtmp_c(context, + /* Choose the correct compute function (depending on offload type) */ + bool default_compute = true; + + #ifdef HAVE_OPENACC_OFFLOAD + if(ctx->jastrow.offload_type == OFFLOAD_OPENACC) { + qmckl_exit_code rc = + qmckl_compute_dtmp_c_acc_offload(context, ctx->jastrow.cord_num, ctx->electron.num, ctx->nucleus.num, @@ -4872,8 +4877,26 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e_deriv_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.dtmp_c); - if (rc != QMCKL_SUCCESS) { - return rc; + default_compute = false; + if (rc != QMCKL_SUCCESS) { + return rc; + } + } + #endif + + if(default_compute) { + qmckl_exit_code rc = + qmckl_compute_dtmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); + if (rc != QMCKL_SUCCESS) { + return rc; + } } ctx->jastrow.dtmp_c_date = ctx->date; @@ -5439,6 +5462,156 @@ end function qmckl_compute_dtmp_c_f end function qmckl_compute_dtmp_c #+end_src +*** Compute dtmp_c (OpenACC offload) + :PROPERTIES: + :Name: qmckl_compute_dtmp_c_acc_offload + :CRetType: qmckl_exit_code + :FRetType: qmckl_exit_code + :END: + + #+NAME: qmckl_factor_dtmp_c_acc_offload_args + | Variable | Type | In/Out | Description | + |--------------------------+------------------------------------------------------------------+--------+-----------------------------------------------| + | ~context~ | ~qmckl_context~ | in | Global state | + | ~cord_num~ | ~int64_t~ | in | Order of polynomials | + | ~elec_num~ | ~int64_t~ | in | Number of electrons | + | ~nucl_num~ | ~int64_t~ | in | Number of nucleii | + | ~walk_num~ | ~int64_t~ | in | Number of walkers | + | ~een_rescaled_e_deriv_e~ | ~double[walk_num][0:cord_num][elec_num][4][elec_num]~ | in | Electron-electron rescaled factor derivatives | + | ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor | + | ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients | + + #+begin_src f90 :comments org :tangle (eval f) :noweb yes +integer function qmckl_compute_dtmp_c_acc_offload_f(context, cord_num, elec_num, nucl_num, & + walk_num, een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c) & + result(info) + use qmckl + implicit none + integer(qmckl_context), intent(in) :: context + integer*8 , intent(in) :: cord_num + integer*8 , intent(in) :: elec_num + integer*8 , intent(in) :: nucl_num + integer*8 , intent(in) :: walk_num + double precision , intent(in) :: een_rescaled_e_deriv_e(elec_num, 4, elec_num, 0:cord_num, walk_num) + double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num) + double precision , intent(out) :: dtmp_c(elec_num, 4, nucl_num,0:cord_num, 0:cord_num-1, walk_num) + double precision :: x, tmp + integer*8 :: i, j, jj, k2, a, l, kk, p, lmax, nw, ii + character :: TransA, TransB + double precision :: alpha, beta + integer*8 :: M, N, K, LDA, LDB, LDC + + TransA = 'N' + TransB = 'N' + alpha = 1.0d0 + beta = 0.0d0 + + info = QMCKL_SUCCESS + + if (context == QMCKL_NULL_CONTEXT) then + info = QMCKL_INVALID_CONTEXT + return + endif + + if (cord_num <= 0) then + info = QMCKL_INVALID_ARG_2 + return + endif + + if (elec_num <= 0) then + info = QMCKL_INVALID_ARG_3 + return + endif + + if (nucl_num <= 0) then + info = QMCKL_INVALID_ARG_4 + return + endif + + M = 4*elec_num + N = nucl_num*(cord_num + 1) + K = elec_num + LDA = 4*size(een_rescaled_e_deriv_e,1) + LDB = size(een_rescaled_n,1) + LDC = 4*size(dtmp_c,1) + + do nw=1, walk_num + do i=0, cord_num-1 + + ! Single DGEMM + do j=0,cord_num + do jj=1,nucl_num + do k2=1,4 + do kk=1,elec_num + + tmp = 0.0 + do l=1,K + tmp = tmp + & + een_rescaled_e_deriv_e(kk, k2, l, i, nw) * een_rescaled_n(l, jj, j, nw) + enddo + ! affect tmp + dtmp_c(kk, k2, jj, j, i, nw ) = tmp + + enddo + enddo + enddo + enddo + + + !info = qmckl_dgemm(context,TransA, TransB, M, N, K, alpha, & + ! een_rescaled_e_deriv_e(1,1,1,i,nw),LDA*1_8, & + ! een_rescaled_n(1,1,0,nw),LDB*1_8, & + ! beta, & + ! dtmp_c(1,1,1,0,i,nw),LDC) + end do + end do + +end function qmckl_compute_dtmp_c_acc_offload_f + #+end_src + + #+CALL: generate_c_header(table=qmckl_factor_dtmp_c_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org + qmckl_exit_code qmckl_compute_dtmp_c_acc_offload ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e_deriv_e, + const double* een_rescaled_n, + double* const dtmp_c ); + #+end_src + + + #+CALL: generate_c_interface(table=qmckl_factor_dtmp_c_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src f90 :tangle (eval f) :comments org :exports none + integer(c_int32_t) function qmckl_compute_dtmp_c_acc_offload & + (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c) & + bind(C) result(info) + + use, intrinsic :: iso_c_binding + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: cord_num + integer (c_int64_t) , intent(in) , value :: elec_num + integer (c_int64_t) , intent(in) , value :: nucl_num + integer (c_int64_t) , intent(in) , value :: walk_num + real (c_double ) , intent(in) :: een_rescaled_e_deriv_e(elec_num,4,elec_num,0:cord_num,walk_num) + real (c_double ) , intent(in) :: een_rescaled_n(elec_num,nucl_num,0:cord_num,walk_num) + real (c_double ) , intent(out) :: dtmp_c(elec_num,nucl_num,0:cord_num,0:cord_num-1,walk_num) + + integer(c_int32_t), external :: qmckl_compute_dtmp_c_f + info = qmckl_compute_dtmp_c_f & + (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c) + + end function qmckl_compute_dtmp_c_acc_offload + #+end_src + *** Test #+name: helper_funcs @@ -6140,10 +6313,10 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) /* Choose the correct compute function (depending on offload type) */ bool default_compute = true; -#ifdef HAVE_OPENMP_OFFLOAD - if(ctx->jastrow.offload_type == OFFLOAD_OPENMP) { + #ifdef HAVE_OPENACC_OFFLOAD + if(ctx->jastrow.offload_type == OFFLOAD_OPENACC) { qmckl_exit_code rc = - qmckl_compute_factor_een_deriv_e_omp_offload(context, + qmckl_compute_factor_een_deriv_e_acc_offload(context, ctx->electron.walk_num, ctx->electron.num, ctx->nucleus.num, @@ -6157,8 +6330,11 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) ctx->jastrow.een_rescaled_n_deriv_e, ctx->jastrow.factor_een_deriv_e); default_compute = false; + if (rc != QMCKL_SUCCESS) { + return rc; + } } -#endif + #endif if(default_compute) { qmckl_exit_code rc = @@ -6175,10 +6351,9 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) ctx->jastrow.een_rescaled_n, ctx->jastrow.een_rescaled_n_deriv_e, ctx->jastrow.factor_een_deriv_e); - } - - if (rc != QMCKL_SUCCESS) { - return rc; + if (rc != QMCKL_SUCCESS) { + return rc; + } } ctx->jastrow.factor_een_deriv_e_date = ctx->date; @@ -6577,14 +6752,14 @@ end function qmckl_compute_factor_een_deriv_e_f end function qmckl_compute_factor_een_deriv_e #+end_src -*** Compute (OpenMP offload)... +*** Compute (OpenACC offload) :PROPERTIES: :Name: qmckl_compute_factor_een_deriv_e :CRetType: qmckl_exit_code :FRetType: qmckl_exit_code :END: - #+NAME: qmckl_factor_een_deriv_e_omp_offload_args + #+NAME: qmckl_factor_een_deriv_e_acc_offload_args | Variable | Type | In/Out | Description | |--------------------------+---------------------------------------------------------------------+--------+------------------------------------------------| | ~context~ | ~qmckl_context~ | in | Global state | @@ -6603,9 +6778,8 @@ end function qmckl_compute_factor_een_deriv_e_f #+begin_src f90 :comments org :tangle (eval f) :noweb yes -#ifdef HAVE_OPENMP_OFFLOAD -! TODO Add some offload statements -integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_num, elec_num, nucl_num, cord_num, dim_cord_vect, & +#ifdef HAVE_OPENACC_OFFLOAD +integer function qmckl_compute_factor_een_deriv_e_acc_offload_f(context, walk_num, elec_num, nucl_num, cord_num, dim_cord_vect, & cord_vect_full, lkpm_combined_index, & tmp_c, dtmp_c, een_rescaled_n, een_rescaled_n_deriv_e, factor_een_deriv_e) & result(info) @@ -6653,8 +6827,8 @@ integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_nu factor_een_deriv_e = 0.0d0 + !$acc parallel do nw =1, walk_num - !$omp target do n = 1, dim_cord_vect l = lkpm_combined_index(n, 1) k = lkpm_combined_index(n, 2) @@ -6665,6 +6839,7 @@ integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_nu cn = cord_vect_full(a, n) if(cn == 0.d0) cycle + !$acc loop collapse(2) do ii = 1, 4 do j = 1, elec_num factor_een_deriv_e(j,ii,nw) = factor_een_deriv_e(j,ii,nw) + (& @@ -6677,6 +6852,8 @@ integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_nu end do cn = cn + cn + + !$acc loop do j = 1, elec_num factor_een_deriv_e(j,4,nw) = factor_een_deriv_e(j,4,nw) + (& (dtmp_c(j,1,a,m ,k,nw)) * een_rescaled_n_deriv_e(j,1,a,m+l,nw) + & @@ -6689,19 +6866,18 @@ integer function qmckl_compute_factor_een_deriv_e_omp_offload_f(context, walk_nu end do end do end do - !$omp end target end do - -end function qmckl_compute_factor_een_deriv_e_omp_offload_f + !$acc end parallel +end function qmckl_compute_factor_een_deriv_e_acc_offload_f #endif #+end_src - #+CALL: generate_c_header(table=qmckl_factor_een_deriv_e_omp_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + #+CALL: generate_c_header(table=qmckl_factor_een_deriv_e_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org -#ifdef HAVE_OPENMP_OFFLOAD - qmckl_exit_code qmckl_compute_factor_een_deriv_e_omp_offload ( +#ifdef HAVE_OPENACC_OFFLOAD + qmckl_exit_code qmckl_compute_factor_een_deriv_e_acc_offload ( const qmckl_context context, const int64_t walk_num, const int64_t elec_num, @@ -6718,12 +6894,12 @@ end function qmckl_compute_factor_een_deriv_e_omp_offload_f #endif #+end_src -#+CALL: generate_c_interface(table=qmckl_factor_een_deriv_e_omp_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +#+CALL: generate_c_interface(table=qmckl_factor_een_deriv_e_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none -#ifdef HAVE_OPENMP_OFFLOAD - integer(c_int32_t) function qmckl_compute_factor_een_deriv_e_omp_offload & +#ifdef HAVE_OPENACC_OFFLOAD + integer(c_int32_t) function qmckl_compute_factor_een_deriv_e_acc_offload & (context, & walk_num, & elec_num, & @@ -6756,8 +6932,8 @@ end function qmckl_compute_factor_een_deriv_e_omp_offload_f real (c_double ) , intent(in) :: een_rescaled_n_deriv_e(elec_num,4,nucl_num,0:cord_num,walk_num) real (c_double ) , intent(out) :: factor_een_deriv_e(elec_num,4,walk_num) - integer(c_int32_t), external :: qmckl_compute_factor_een_deriv_e_omp_offload_f - info = qmckl_compute_factor_een_deriv_e_omp_offload_f & + integer(c_int32_t), external :: qmckl_compute_factor_een_deriv_e_acc_offload_f + info = qmckl_compute_factor_een_deriv_e_acc_offload_f & (context, & walk_num, & elec_num, & @@ -6772,7 +6948,7 @@ end function qmckl_compute_factor_een_deriv_e_omp_offload_f een_rescaled_n_deriv_e, & factor_een_deriv_e) - end function qmckl_compute_factor_een_deriv_e_omp_offload + end function qmckl_compute_factor_een_deriv_e_acc_offload #endif #+end_src From 9428eaa19e628906ae456fc57680e41652210b41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Delval?= Date: Wed, 30 Mar 2022 16:16:06 +0200 Subject: [PATCH 022/100] Implement computation of tmp_c and dtmp_c in OpenACC These 2 kernels seem to give good speedup compared to the CPU BLAS versions. However, the current GPU implementation of factor_een_deriv seems to be slightly slower (on the tested machine). TODO: - Try to improve factor_een_deriv GPU implem - Try out a cuBLAS implementation of tmp_c and dtmp_c --- org/qmckl_jastrow.org | 235 +++++++++++++++++++++++++++++++++--------- 1 file changed, 184 insertions(+), 51 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 70e1a8b..ccf0c4e 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -4809,19 +4809,41 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) } ctx->jastrow.tmp_c = tmp_c; } + /* Choose the correct compute function (depending on offload type) */ + bool default_compute = true; - qmckl_exit_code rc = - qmckl_compute_tmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - if (rc != QMCKL_SUCCESS) { - return rc; + #ifdef HAVE_OPENACC_OFFLOAD + if(ctx->jastrow.offload_type == OFFLOAD_OPENACC) { + qmckl_exit_code rc = + qmckl_compute_tmp_c_acc_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + if (rc != QMCKL_SUCCESS) { + return rc; + } } + #endif + + if(default_compute) { + qmckl_exit_code rc = + qmckl_compute_tmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + if (rc != QMCKL_SUCCESS) { + return rc; + } + } + ctx->jastrow.tmp_c_date = ctx->date; } @@ -5332,6 +5354,134 @@ end function qmckl_compute_tmp_c_f end function qmckl_compute_tmp_c #+end_src +*** Compute tmp_c (OpenACC offload) + :PROPERTIES: + :Name: qmckl_compute_tmp_c + :CRetType: qmckl_exit_code + :FRetType: qmckl_exit_code + :END: + + #+NAME: qmckl_factor_tmp_c_acc_offload_args + | Variable | Type | In/Out | Description | + |------------------+------------------------------------------------------------------+--------+-----------------------------------| + | ~context~ | ~qmckl_context~ | in | Global state | + | ~cord_num~ | ~int64_t~ | in | Order of polynomials | + | ~elec_num~ | ~int64_t~ | in | Number of electrons | + | ~nucl_num~ | ~int64_t~ | in | Number of nucleii | + | ~walk_num~ | ~int64_t~ | in | Number of walkers | + | ~een_rescaled_e~ | ~double[walk_num][0:cord_num][elec_num][elec_num]~ | in | Electron-electron rescaled factor | + | ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor | + | ~tmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients | + + #+begin_src f90 :comments org :tangle (eval f) :noweb yes +integer function qmckl_compute_tmp_c_acc_offload_f(context, cord_num, elec_num, nucl_num, & + walk_num, een_rescaled_e, een_rescaled_n, tmp_c) & + result(info) + use qmckl + implicit none + integer(qmckl_context), intent(in) :: context + integer*8 , intent(in) :: cord_num + integer*8 , intent(in) :: elec_num + integer*8 , intent(in) :: nucl_num + integer*8 , intent(in) :: walk_num + double precision , intent(in) :: een_rescaled_e(elec_num, elec_num, 0:cord_num, walk_num) + double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num) + double precision , intent(out) :: tmp_c(elec_num, nucl_num,0:cord_num, 0:cord_num-1, walk_num) + double precision :: tmp + integer*8 :: i, j, jj, k, l, p, lmax, nw + + + info = QMCKL_SUCCESS + if (context == QMCKL_NULL_CONTEXT) then + info = QMCKL_INVALID_CONTEXT + return + endif + + if (cord_num <= 0) then + info = QMCKL_INVALID_ARG_2 + return + endif + + if (elec_num <= 0) then + info = QMCKL_INVALID_ARG_3 + return + endif + + if (nucl_num <= 0) then + info = QMCKL_INVALID_ARG_4 + return + endif + + !$acc parallel + !$acc loop independent gang worker vector collapse(5) + do nw=1, walk_num + do i=0, cord_num-1 + + do j=0,cord_num + do jj=1,nucl_num + do k=1,elec_num + + tmp = 0.0 + do l=1,elec_num + tmp = tmp + & + een_rescaled_e(k, l, i, nw) * een_rescaled_n(l, jj, j, nw) + end do + tmp_c(k, jj, j, i, nw) = tmp + + end do + end do + end do + + end do + end do + !$acc end parallel + + +end function qmckl_compute_tmp_c_acc_offload_f + #+end_src + + #+CALL: generate_c_header(table=qmckl_factor_tmp_c_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org + qmckl_exit_code qmckl_compute_tmp_c_acc_offload ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + + + #+CALL: generate_c_interface(table=qmckl_factor_tmp_c_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src f90 :tangle (eval f) :comments org :exports none + integer(c_int32_t) function qmckl_compute_tmp_c_acc_offload & + (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e, een_rescaled_n, tmp_c) & + bind(C) result(info) + + use, intrinsic :: iso_c_binding + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: cord_num + integer (c_int64_t) , intent(in) , value :: elec_num + integer (c_int64_t) , intent(in) , value :: nucl_num + integer (c_int64_t) , intent(in) , value :: walk_num + real (c_double ) , intent(in) :: een_rescaled_e(elec_num,elec_num,0:cord_num,walk_num) + real (c_double ) , intent(in) :: een_rescaled_n(elec_num,nucl_num,0:cord_num,walk_num) + real (c_double ) , intent(out) :: tmp_c(elec_num,nucl_num,0:cord_num,0:cord_num-1,walk_num) + + integer(c_int32_t), external :: qmckl_compute_tmp_c_acc_offload_f + info = qmckl_compute_tmp_c_acc_offload_f & + (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e, een_rescaled_n, tmp_c) + + end function qmckl_compute_tmp_c_acc_offload + #+end_src *** Compute dtmp_c :PROPERTIES: @@ -5495,19 +5645,10 @@ integer function qmckl_compute_dtmp_c_acc_offload_f(context, cord_num, elec_num, double precision , intent(in) :: een_rescaled_e_deriv_e(elec_num, 4, elec_num, 0:cord_num, walk_num) double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num) double precision , intent(out) :: dtmp_c(elec_num, 4, nucl_num,0:cord_num, 0:cord_num-1, walk_num) - double precision :: x, tmp - integer*8 :: i, j, jj, k2, a, l, kk, p, lmax, nw, ii - character :: TransA, TransB - double precision :: alpha, beta - integer*8 :: M, N, K, LDA, LDB, LDC - - TransA = 'N' - TransB = 'N' - alpha = 1.0d0 - beta = 0.0d0 + double precision :: tmp + integer*8 :: nw, i, j, jj, k, kk, l info = QMCKL_SUCCESS - if (context == QMCKL_NULL_CONTEXT) then info = QMCKL_INVALID_CONTEXT return @@ -5528,44 +5669,32 @@ integer function qmckl_compute_dtmp_c_acc_offload_f(context, cord_num, elec_num, return endif - M = 4*elec_num - N = nucl_num*(cord_num + 1) - K = elec_num - LDA = 4*size(een_rescaled_e_deriv_e,1) - LDB = size(een_rescaled_n,1) - LDC = 4*size(dtmp_c,1) - + !$acc parallel + !$acc loop independent gang worker vector collapse(6) do nw=1, walk_num do i=0, cord_num-1 - ! Single DGEMM - do j=0,cord_num + do j=0,cord_num do jj=1,nucl_num - do k2=1,4 + do k=1,4 do kk=1,elec_num tmp = 0.0 - do l=1,K + do l=1,elec_num tmp = tmp + & - een_rescaled_e_deriv_e(kk, k2, l, i, nw) * een_rescaled_n(l, jj, j, nw) - enddo - ! affect tmp - dtmp_c(kk, k2, jj, j, i, nw ) = tmp + een_rescaled_e_deriv_e(kk, k, l, i, nw) * een_rescaled_n(l, jj, j, nw) + end do + dtmp_c(kk, k, jj, j, i, nw ) = tmp - enddo - enddo - enddo - enddo - - - !info = qmckl_dgemm(context,TransA, TransB, M, N, K, alpha, & - ! een_rescaled_e_deriv_e(1,1,1,i,nw),LDA*1_8, & - ! een_rescaled_n(1,1,0,nw),LDB*1_8, & - ! beta, & - ! dtmp_c(1,1,1,0,i,nw),LDC) + end do + end do end do end do + end do + end do + !$acc end parallel + end function qmckl_compute_dtmp_c_acc_offload_f #+end_src @@ -5605,8 +5734,8 @@ end function qmckl_compute_dtmp_c_acc_offload_f real (c_double ) , intent(in) :: een_rescaled_n(elec_num,nucl_num,0:cord_num,walk_num) real (c_double ) , intent(out) :: dtmp_c(elec_num,nucl_num,0:cord_num,0:cord_num-1,walk_num) - integer(c_int32_t), external :: qmckl_compute_dtmp_c_f - info = qmckl_compute_dtmp_c_f & + integer(c_int32_t), external :: qmckl_compute_dtmp_c_acc_offload_f + info = qmckl_compute_dtmp_c_acc_offload_f & (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c) end function qmckl_compute_dtmp_c_acc_offload @@ -6316,7 +6445,11 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) #ifdef HAVE_OPENACC_OFFLOAD if(ctx->jastrow.offload_type == OFFLOAD_OPENACC) { qmckl_exit_code rc = - qmckl_compute_factor_een_deriv_e_acc_offload(context, + // CPU version + qmckl_compute_factor_een_deriv_e(context, + + // GPU version : No speedup on this kernel yet + // qmckl_compute_factor_een_deriv_e_acc_offload(context, ctx->electron.walk_num, ctx->electron.num, ctx->nucleus.num, From 26bbd6f3412aedac48c55d1a9fbc7325df74d568 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Delval?= Date: Fri, 1 Apr 2022 09:19:56 +0200 Subject: [PATCH 023/100] Start work on cuBLAS implementation TODO Replace CPU BLAS calls by cuBLAS calls (will probably require to write a Fortran to the functions we're interested in, at least DGEMMs) --- org/qmckl_jastrow.org | 297 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 296 insertions(+), 1 deletion(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index ccf0c4e..8e2a00c 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -330,7 +330,8 @@ kappa_inv = 1.0/kappa #+begin_src c :comments org :tangle (eval h_type) typedef enum qmckl_jastrow_offload_type{ OFFLOAD_NONE, - OFFLOAD_OPENACC + OFFLOAD_OPENACC, + OFFLOAD_CUBLAS } qmckl_jastrow_offload_type; #+end_src @@ -4829,6 +4830,23 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) } #endif + #ifdef HAVE_CUBLAS_OFFLOAD + if(ctx->jastrow.offload_type == OFFLOAD_CUBLAS) { + qmckl_exit_code rc = + qmckl_compute_tmp_c_cublas_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + if (rc != QMCKL_SUCCESS) { + return rc; + } + } + #endif + if(default_compute) { qmckl_exit_code rc = qmckl_compute_tmp_c(context, @@ -4906,6 +4924,24 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) } #endif + #ifdef HAVE_CUBLAS_OFFLOAD + if(ctx->jastrow.offload_type == OFFLOAD_CUBLAS) { + qmckl_exit_code rc = + qmckl_compute_dtmp_c_cublas_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); + default_compute = false; + if (rc != QMCKL_SUCCESS) { + return rc; + } + } + #endif + if(default_compute) { qmckl_exit_code rc = qmckl_compute_dtmp_c(context, @@ -5483,6 +5519,137 @@ end function qmckl_compute_tmp_c_acc_offload_f end function qmckl_compute_tmp_c_acc_offload #+end_src +*** Compute tmp_c (cuBLAS offload) + :PROPERTIES: + :Name: qmckl_compute_tmp_c_cublas_offload + :CRetType: qmckl_exit_code + :FRetType: qmckl_exit_code + :END: + + #+NAME: qmckl_factor_tmp_c_cublas_offload_args + | Variable | Type | In/Out | Description | + |------------------+------------------------------------------------------------------+--------+-----------------------------------| + | ~context~ | ~qmckl_context~ | in | Global state | + | ~cord_num~ | ~int64_t~ | in | Order of polynomials | + | ~elec_num~ | ~int64_t~ | in | Number of electrons | + | ~nucl_num~ | ~int64_t~ | in | Number of nucleii | + | ~walk_num~ | ~int64_t~ | in | Number of walkers | + | ~een_rescaled_e~ | ~double[walk_num][0:cord_num][elec_num][elec_num]~ | in | Electron-electron rescaled factor | + | ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor | + | ~tmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients | + + #+begin_src f90 :comments org :tangle (eval f) :noweb yes +integer function qmckl_compute_tmp_c_cublas_offload_f(context, cord_num, elec_num, nucl_num, & + walk_num, een_rescaled_e, een_rescaled_n, tmp_c) & + result(info) + use qmckl + implicit none + integer(qmckl_context), intent(in) :: context + integer*8 , intent(in) :: cord_num + integer*8 , intent(in) :: elec_num + integer*8 , intent(in) :: nucl_num + integer*8 , intent(in) :: walk_num + double precision , intent(in) :: een_rescaled_e(elec_num, elec_num, 0:cord_num, walk_num) + double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num) + double precision , intent(out) :: tmp_c(elec_num, nucl_num,0:cord_num, 0:cord_num-1, walk_num) + double precision :: x + integer*8 :: i, j, a, l, kk, p, lmax, nw + character :: TransA, TransB + double precision :: alpha, beta + integer*8 :: M, N, K, LDA, LDB, LDC + + TransA = 'N' + TransB = 'N' + alpha = 1.0d0 + beta = 0.0d0 + + info = QMCKL_SUCCESS + + if (context == QMCKL_NULL_CONTEXT) then + info = QMCKL_INVALID_CONTEXT + return + endif + + if (cord_num <= 0) then + info = QMCKL_INVALID_ARG_2 + return + endif + + if (elec_num <= 0) then + info = QMCKL_INVALID_ARG_3 + return + endif + + if (nucl_num <= 0) then + info = QMCKL_INVALID_ARG_4 + return + endif + + M = elec_num + N = nucl_num*(cord_num + 1) + K = elec_num + LDA = size(een_rescaled_e,1) + LDB = size(een_rescaled_n,1) + LDC = size(tmp_c,1) + + ! Alloc and copy memory on device + + do nw=1, walk_num + do i=0, cord_num-1 + info = qmckl_dgemm(context,TransA, TransB, M, N, K, alpha, & + een_rescaled_e(1,1,i,nw),LDA*1_8, & + een_rescaled_n(1,1,0,nw),LDB*1_8, & + beta, & + tmp_c(1,1,0,i,nw),LDC) + end do + end do + +end function qmckl_compute_tmp_c_cublas_offload_f + #+end_src + + #+CALL: generate_c_header(table=qmckl_factor_tmp_c_cublas_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org + qmckl_exit_code qmckl_compute_tmp_c_cublas_offload ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + + + #+CALL: generate_c_interface(table=qmckl_factor_tmp_c_cublas_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src f90 :tangle (eval f) :comments org :exports none + integer(c_int32_t) function qmckl_compute_tmp_c_cublas_offload & + (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e, een_rescaled_n, tmp_c) & + bind(C) result(info) + + use, intrinsic :: iso_c_binding + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: cord_num + integer (c_int64_t) , intent(in) , value :: elec_num + integer (c_int64_t) , intent(in) , value :: nucl_num + integer (c_int64_t) , intent(in) , value :: walk_num + real (c_double ) , intent(in) :: een_rescaled_e(elec_num,elec_num,0:cord_num,walk_num) + real (c_double ) , intent(in) :: een_rescaled_n(elec_num,nucl_num,0:cord_num,walk_num) + real (c_double ) , intent(out) :: tmp_c(elec_num,nucl_num,0:cord_num,0:cord_num-1,walk_num) + + integer(c_int32_t), external :: qmckl_compute_tmp_c_cublas_offload_f + info = qmckl_compute_tmp_c_cublas_offload_f & + (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e, een_rescaled_n, tmp_c) + + end function qmckl_compute_tmp_c_cublas_offload + #+end_src + *** Compute dtmp_c :PROPERTIES: :Name: qmckl_compute_dtmp_c @@ -5740,6 +5907,134 @@ end function qmckl_compute_dtmp_c_acc_offload_f end function qmckl_compute_dtmp_c_acc_offload #+end_src +*** Compute dtmp_c (cuBLAS offload) + :PROPERTIES: + :Name: qmckl_compute_dtmp_c + :CRetType: qmckl_exit_code + :FRetType: qmckl_exit_code + :END: + + #+NAME: qmckl_factor_dtmp_c_cublas_offload_args + | Variable | Type | In/Out | Description | + |--------------------------+------------------------------------------------------------------+--------+-----------------------------------------------| + | ~context~ | ~qmckl_context~ | in | Global state | + | ~cord_num~ | ~int64_t~ | in | Order of polynomials | + | ~elec_num~ | ~int64_t~ | in | Number of electrons | + | ~nucl_num~ | ~int64_t~ | in | Number of nucleii | + | ~walk_num~ | ~int64_t~ | in | Number of walkers | + | ~een_rescaled_e_deriv_e~ | ~double[walk_num][0:cord_num][elec_num][4][elec_num]~ | in | Electron-electron rescaled factor derivatives | + | ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor | + | ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients | + + #+begin_src f90 :comments org :tangle (eval f) :noweb yes +integer function qmckl_compute_dtmp_c_cublas_offload_f(context, cord_num, elec_num, nucl_num, & + walk_num, een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c) & + result(info) + use qmckl + implicit none + integer(qmckl_context), intent(in) :: context + integer*8 , intent(in) :: cord_num + integer*8 , intent(in) :: elec_num + integer*8 , intent(in) :: nucl_num + integer*8 , intent(in) :: walk_num + double precision , intent(in) :: een_rescaled_e_deriv_e(elec_num, 4, elec_num, 0:cord_num, walk_num) + double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num) + double precision , intent(out) :: dtmp_c(elec_num, 4, nucl_num,0:cord_num, 0:cord_num-1, walk_num) + double precision :: x + integer*8 :: i, j, a, l, kk, p, lmax, nw, ii + character :: TransA, TransB + double precision :: alpha, beta + integer*8 :: M, N, K, LDA, LDB, LDC + + TransA = 'N' + TransB = 'N' + alpha = 1.0d0 + beta = 0.0d0 + + info = QMCKL_SUCCESS + + if (context == QMCKL_NULL_CONTEXT) then + info = QMCKL_INVALID_CONTEXT + return + endif + + if (cord_num <= 0) then + info = QMCKL_INVALID_ARG_2 + return + endif + + if (elec_num <= 0) then + info = QMCKL_INVALID_ARG_3 + return + endif + + if (nucl_num <= 0) then + info = QMCKL_INVALID_ARG_4 + return + endif + + M = 4*elec_num + N = nucl_num*(cord_num + 1) + K = elec_num + LDA = 4*size(een_rescaled_e_deriv_e,1) + LDB = size(een_rescaled_n,1) + LDC = 4*size(dtmp_c,1) + + do nw=1, walk_num + do i=0, cord_num-1 + info = qmckl_dgemm(context,TransA, TransB, M, N, K, alpha, & + een_rescaled_e_deriv_e(1,1,1,i,nw),LDA*1_8, & + een_rescaled_n(1,1,0,nw),LDB*1_8, & + beta, & + dtmp_c(1,1,1,0,i,nw),LDC) + end do + end do + +end function qmckl_compute_dtmp_c_cublas_offload_f + #+end_src + + #+CALL: generate_c_header(table=qmckl_factor_dtmp_c_cublas_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org + qmckl_exit_code qmckl_compute_dtmp_c_cublas_offload ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e_deriv_e, + const double* een_rescaled_n, + double* const dtmp_c ); + #+end_src + + + #+CALL: generate_c_interface(table=qmckl_factor_dtmp_c_cublas_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + + #+RESULTS: + #+begin_src f90 :tangle (eval f) :comments org :exports none + integer(c_int32_t) function qmckl_compute_dtmp_c_cublas_offload & + (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c) & + bind(C) result(info) + + use, intrinsic :: iso_c_binding + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: cord_num + integer (c_int64_t) , intent(in) , value :: elec_num + integer (c_int64_t) , intent(in) , value :: nucl_num + integer (c_int64_t) , intent(in) , value :: walk_num + real (c_double ) , intent(in) :: een_rescaled_e_deriv_e(elec_num,4,elec_num,0:cord_num,walk_num) + real (c_double ) , intent(in) :: een_rescaled_n(elec_num,nucl_num,0:cord_num,walk_num) + real (c_double ) , intent(out) :: dtmp_c(elec_num,nucl_num,0:cord_num,0:cord_num-1,walk_num) + + integer(c_int32_t), external :: qmckl_compute_dtmp_c_cublas_offload_f + info = qmckl_compute_dtmp_c_cublas_offload_f & + (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c) + + end function qmckl_compute_dtmp_c_cublas_offload + #+end_src *** Test From 1173bb2586ad8938eede917595d216b793a508a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Delval?= Date: Fri, 1 Apr 2022 17:56:27 +0200 Subject: [PATCH 024/100] Update configure.ac with cuBLAS support (forgotten in last commit) --- configure.ac | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 8bd6747..d2ac039 100644 --- a/configure.ac +++ b/configure.ac @@ -234,7 +234,7 @@ if test "$FC" = "verificarlo-f"; then FCFLAGS="-Mpreprocess $FCFLAGS" fi -# Enable GPU offloading +## Enable GPU offloading # OpenACC offloading AC_ARG_ENABLE(openacc-offload, [AS_HELP_STRING([--openacc-offload],[Use OpenACC-offloaded functions])], HAVE_OPENACC_OFFLOAD=$enableval, HAVE_OPENACC_OFFLOAD=no) @@ -244,6 +244,21 @@ AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS -DHAVE_OPENACC_OFFLOAD $FCFLAGS" ]) +# cuBLAS offloading +AC_ARG_ENABLE(cublas-offload, [AS_HELP_STRING([--cublas-offload],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$enableval, HAVE_CUBLAS_OFFLOAD=no) +AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ + AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines]) + FCFLAGS="-DHAVE_CUBLAS_OFFLOAD" +]) + +# General offload +AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes" || test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ + CFLAGS="$OFFLOAD_FLAGS $OFFLOAD_CFLAGS $CFLAGS" + FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS $FCFLAGS" +]) + +## + AC_ARG_ENABLE(debug, [AS_HELP_STRING([--enable-debug],[compile for debugging])], ok=$enableval, ok=no) if test "$ok" = "yes"; then if test "$GCC" = "yes"; then @@ -374,7 +389,8 @@ LDFLAGS:........: ${LDFLAGS} LIBS............: ${LIBS} USE CHAMELEON...: ${with_chameleon} HPC version.....: ${HAVE_HPC} -OpenACC offload : ${HAVE_OPENACC_OFFLOAD} +OpenACC offload.: ${HAVE_OPENACC_OFFLOAD} +cuBLAS offload..: ${HAVE_CUBLAS_OFFLOAD} Package features: ${ARGS} From 84013a5f760cd044e43d3144671a0a266cb65e04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Delval?= Date: Mon, 4 Apr 2022 12:12:11 +0200 Subject: [PATCH 025/100] Cleanup before merging into QMCkl's GPU branch --- org/qmckl_blas.org | 92 +++++++++++++++++++++++++++++++++++++++++++ org/qmckl_jastrow.org | 2 +- 2 files changed, 93 insertions(+), 1 deletion(-) diff --git a/org/qmckl_blas.org b/org/qmckl_blas.org index 9cd7e18..0a83b34 100644 --- a/org/qmckl_blas.org +++ b/org/qmckl_blas.org @@ -2288,6 +2288,98 @@ qmckl_transpose (qmckl_context context, #+end_src +* cuBLAS interface (optional) +We propose a cuBLAS version of some QMCkl kernels. However, because cuBLAS is written in C, we need to define a Fortran interface for it. We start by defining functions to manage the cuBLAS handle structure from Fortran, before writing interfaces for the specific cuBLAS functions we are interested in. + +TODO These are the C functions that are supposed to be called from Fortran. We still need to write the interfaces themselves. + +#+begin_src c :tangle (eval h_private_func) :comments org +#ifdef HAVE_CUBLAS_OFFLOAD +#include +#endif +#+end_src + +#+begin_src c :tangle (eval h_private_func) :comments org +#ifdef HAVE_CUBLAS_OFFLOAD +cublasHandle_t* get_cublas_handle_interfaced(); +#endif +#+end_src + +#+begin_src c :comments org :tangle (eval c) :exports none +#ifdef HAVE_CUBLAS_OFFLOAD +cublasHandle_t* get_cublas_handle_interfaced() { + cublasHandle_t* handle = malloc(sizeof(cublasHandle_t)); + + cublasStatus_t status = cublasCreate(handle); + if (status != CUBLAS_STATUS_SUCCESS){ + fprintf(stderr, "Error while initializing cuBLAS\n"); + exit(1); + } + + return handle; +} +#endif +#+end_src + +#+begin_src c :tangle (eval h_private_func) :comments org +#ifdef HAVE_CUBLAS_OFFLOAD +void destroy_cublas_handle_interfaced(cublasHandle_t* handle); +#endif +#+end_src + +#+begin_src c :comments org :tangle (eval c) :exports none +#ifdef HAVE_CUBLAS_OFFLOAD +void destroy_cublas_handle_interfaced(cublasHandle_t* handle) { + if(handle != NULL) { + free(handle); + } +} +#endif +#+end_src + +** DGEMM + +#+begin_src c :tangle (eval h_private_func) :comments org +#ifdef HAVE_CUBLAS_OFFLOAD +cublasStatus_t cublasDgemm_f( + cublasHandle_t* handle, + cublasOperation_t* transa, cublasOperation_t* transb, + int* m, int* n, int* k, + const double* alpha, + const double*A, int* lda, + const double* B, int* ldb, + const double* beta, + double*C, int* ldc +); +#endif +#+end_src + +#+begin_src c :comments org :tangle (eval c) :exports none + +#ifdef HAVE_CUBLAS_OFFLOAD +cublasStatus_t cublasDgemm_f( + cublasHandle_t* handle, + cublasOperation_t* transa, cublasOperation_t* transb, + int* m, int* n, int* k, + const double* alpha, + const double*A, int* lda, + const double* B, int* ldb, + const double* beta, + double*C, int* ldc +) { + return cublasDgemm_f( + handle, + transa, transb, + m, n, k, + alpha, A, lda, B,ldb, + beta, C, ldc + ); +} +#endif +#+end_src + + + * End of files :noexport: diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 8e2a00c..e1a7a18 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5592,7 +5592,7 @@ integer function qmckl_compute_tmp_c_cublas_offload_f(context, cord_num, elec_nu LDB = size(een_rescaled_n,1) LDC = size(tmp_c,1) - ! Alloc and copy memory on device + ! TODO Replace with calls to cuBLAS do nw=1, walk_num do i=0, cord_num-1 From 31a05c47e2de79162743a2feb4b04f0f3681d630 Mon Sep 17 00:00:00 2001 From: hoffer Date: Mon, 4 Apr 2022 12:41:00 +0200 Subject: [PATCH 026/100] Add flags for nvc and nvfortran to support offload --- configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index fdae5dc..c746a41 100644 --- a/configure.ac +++ b/configure.ac @@ -201,11 +201,11 @@ AS_IF([test "$BLAS_LIBS" == "$LAPACK_LIBS"], [BLAS_LIBS=""]) case $FC in *ifort*) - FCFLAGS="$FCFLAGS -nofor-main" + FCFLAGS="$FCFLAGS -nofor-main -mp -target=gpu" ;; *nvfortran*) - FCFLAGS="$FCFLAGS -fPIC -Mnomain" + FCFLAGS="$FCFLAGS -fPIC -Mnomain -mp -target=gpu" ;; esac From 1f9ea610d44265813dd89bd31fe2a35564da9d42 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 4 Apr 2022 16:56:33 +0200 Subject: [PATCH 027/100] Moved C version of Jastrow into HPC --- org/qmckl_jastrow.org | 349 +++++++++++++++++++++++++++++++----------- 1 file changed, 256 insertions(+), 93 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 9b0370c..d4eaaa8 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -1727,7 +1727,7 @@ integer function qmckl_compute_factor_ee_f(context, walk_num, elec_num, up_num, double precision , intent(out) :: factor_ee(walk_num) integer*8 :: i, j, p, ipar, nw - double precision :: pow_ser, x, spin_fact, power_ser + double precision :: x, power_ser, spin_fact info = QMCKL_SUCCESS @@ -1766,7 +1766,7 @@ integer function qmckl_compute_factor_ee_f(context, walk_num, elec_num, up_num, power_ser = power_ser + bord_vector(p + 1) * x end do - if(j .LE. up_num .OR. i .GT. up_num) then + if(j <= up_num .OR. i > up_num) then spin_fact = 0.5d0 ipar = 2 endif @@ -1785,7 +1785,7 @@ end function qmckl_compute_factor_ee_f #+end_src #+begin_src c :comments org :tangle (eval c) :noweb yes - qmckl_exit_code qmckl_compute_factor_ee ( +qmckl_exit_code qmckl_compute_factor_ee ( const qmckl_context context, const int64_t walk_num, const int64_t elec_num, @@ -1797,7 +1797,7 @@ end function qmckl_compute_factor_ee_f double* const factor_ee ) { int ipar; // can we use a smaller integer? - double pow_ser, x, x1, spin_fact, power_ser; + double x, x1, spin_fact, power_ser; if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; @@ -2493,8 +2493,8 @@ integer function qmckl_compute_factor_en_f(context, walk_num, elec_num, nucl_num double precision , intent(in) :: en_distance_rescaled(elec_num, nucl_num, walk_num) double precision , intent(out) :: factor_en(walk_num) - integer*8 :: i, a, p, ipar, nw - double precision :: x, spin_fact, power_ser + integer*8 :: i, a, p, nw + double precision :: x, power_ser info = QMCKL_SUCCESS @@ -2563,10 +2563,7 @@ qmckl_exit_code qmckl_compute_factor_en ( const double* en_distance_rescaled, double* const factor_en ) { - - int ipar; - double x, x1, spin_fact, power_ser; - + double x, x1, power_ser; if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; @@ -2584,10 +2581,30 @@ qmckl_exit_code qmckl_compute_factor_en ( return QMCKL_INVALID_ARG_4; } + if (type_nucl_num <= 0) { + return QMCKL_INVALID_ARG_5; + } + + if (type_nucl_vector == NULL) { + return QMCKL_INVALID_ARG_6; + } + if (aord_num <= 0) { return QMCKL_INVALID_ARG_7; } + if (aord_vector == NULL) { + return QMCKL_INVALID_ARG_8; + } + + if (en_distance_rescaled == NULL) { + return QMCKL_INVALID_ARG_9; + } + + if (factor_en == NULL) { + return QMCKL_INVALID_ARG_10; + } + for (int nw = 0; nw < walk_num; ++nw ) { // init array @@ -2826,7 +2843,7 @@ integer function qmckl_compute_factor_en_deriv_e_f(context, walk_num, elec_num, double precision , intent(out) :: factor_en_deriv_e(elec_num,4,walk_num) integer*8 :: i, a, p, ipar, nw, ii - double precision :: x, spin_fact, den, invden, invden2, invden3, xinv + double precision :: x, den, invden, invden2, invden3, xinv double precision :: y, lap1, lap2, lap3, third double precision, dimension(3) :: power_ser_g double precision, dimension(4) :: dx @@ -5264,7 +5281,7 @@ qmckl_exit_code qmckl_compute_lkpm_combined_index ( | ~tmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_tmp_c_f(context, cord_num, elec_num, nucl_num, & +integer function qmckl_compute_tmp_c_doc_f(context, cord_num, elec_num, nucl_num, & walk_num, een_rescaled_e, een_rescaled_n, tmp_c) & result(info) use qmckl @@ -5319,7 +5336,7 @@ integer function qmckl_compute_tmp_c_f(context, cord_num, elec_num, nucl_num, & do nw=1, walk_num do i=0, cord_num-1 - info = qmckl_dgemm(context,TransA, TransB, M, N, K, alpha, & + info = qmckl_dgemm(context, TransA, TransB, M, N, K, alpha, & een_rescaled_e(1,1,i,nw),LDA*1_8, & een_rescaled_n(1,1,0,nw),LDB*1_8, & beta, & @@ -5327,11 +5344,39 @@ integer function qmckl_compute_tmp_c_f(context, cord_num, elec_num, nucl_num, & end do end do -end function qmckl_compute_tmp_c_f +end function qmckl_compute_tmp_c_doc_f #+end_src +#+CALL: generate_c_interface(table=qmckl_factor_tmp_c_args,rettyp=get_value("FRetType"),fname="qmckl_compute_tmp_c_doc") + +#+RESULTS: +#+begin_src f90 :tangle (eval f) :comments org :exports none +integer(c_int32_t) function qmckl_compute_tmp_c_doc & + (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e, een_rescaled_n, tmp_c) & + bind(C) result(info) + + use, intrinsic :: iso_c_binding + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: cord_num + integer (c_int64_t) , intent(in) , value :: elec_num + integer (c_int64_t) , intent(in) , value :: nucl_num + integer (c_int64_t) , intent(in) , value :: walk_num + real (c_double ) , intent(in) :: een_rescaled_e(elec_num,elec_num,0:cord_num,walk_num) + real (c_double ) , intent(in) :: een_rescaled_n(elec_num,nucl_num,0:cord_num,walk_num) + real (c_double ) , intent(out) :: tmp_c(elec_num,nucl_num,0:cord_num,0:cord_num-1,walk_num) + + integer(c_int32_t), external :: qmckl_compute_tmp_c_doc_f + info = qmckl_compute_tmp_c_doc_f & + (context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e, een_rescaled_n, tmp_c) + +end function qmckl_compute_tmp_c_doc +#+end_src + + #+begin_src c :comments org :tangle (eval c) :noweb yes -qmckl_exit_code qmckl_compute_tmp_c ( +qmckl_exit_code qmckl_compute_tmp_c_hpc ( const qmckl_context context, const int64_t cord_num, const int64_t elec_num, @@ -5341,17 +5386,6 @@ qmckl_exit_code qmckl_compute_tmp_c ( const double* een_rescaled_n, double* const tmp_c ) { - qmckl_exit_code info; - int i, j, a, l, kk, p, lmax, nw; - char TransA, TransB; - double alpha, beta; - int M, N, K, LDA, LDB, LDC; - - TransA = 'N'; - TransB = 'N'; - alpha = 1.0; - beta = 0.0; - if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; } @@ -5368,26 +5402,37 @@ qmckl_exit_code qmckl_compute_tmp_c ( return QMCKL_INVALID_ARG_4; } - M = elec_num; - N = nucl_num*(cord_num + 1); - K = elec_num; - - LDA = sizeof(een_rescaled_e)/sizeof(double); - LDB = sizeof(een_rescaled_n)/sizeof(double); - LDC = sizeof(tmp_c)/sizeof(double); + if (walk_num <= 0) { + return QMCKL_INVALID_ARG_5; + } - for (int nw=0; nw < walk_num; ++nw) { - for (int i=0; i Date: Mon, 4 Apr 2022 17:30:38 +0200 Subject: [PATCH 028/100] Fixed documentation --- org/qmckl_electron.org | 28 ++++++++++++++-------------- org/qmckl_jastrow.org | 38 +++++++++++++++++++------------------- org/qmckl_point.org | 2 +- 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/org/qmckl_electron.org b/org/qmckl_electron.org index 33ac366..360d1bf 100644 --- a/org/qmckl_electron.org +++ b/org/qmckl_electron.org @@ -97,8 +97,8 @@ int main() { | ~ee_distance_rescaled_deriv_e_date~ | ~uint64_t~ | Last modification date of the electron-electron distance derivatives | | ~ee_pot~ | ~double[walk_num]~ | Electron-electron rescaled distances derivatives | | ~ee_pot_date~ | ~uint64_t~ | Last modification date of the electron-electron distance derivatives | - | ~en_pot~ | double[walk_num] | Electron-nucleus potential energy | - | ~en_pot_date~ | int64_t | Date when the electron-nucleus potential energy was computed | + | ~en_pot~ | ~double[walk_num]~ | Electron-nucleus potential energy | + | ~en_pot_date~ | ~int64_t~ | Date when the electron-nucleus potential energy was computed | | ~en_distance_rescaled~ | ~double[walk_num][nucl_num][num]~ | Electron-nucleus distances | | ~en_distance_rescaled_date~ | ~uint64_t~ | Last modification date of the electron-electron distances | | ~en_distance_rescaled_deriv_e~ | ~double[walk_num][4][nucl_num][num]~ | Electron-electron rescaled distances derivatives | @@ -1218,7 +1218,7 @@ qmckl_exit_code qmckl_provide_ee_distance_rescaled(qmckl_context context) | ~elec_num~ | ~int64_t~ | in | Number of electrons | | ~rescale_factor_kappa_ee~ | ~double~ | in | Factor to rescale ee distances | | ~walk_num~ | ~int64_t~ | in | Number of walkers | - | ~coord~ | ~double[walk_num][3][elec_num]~ | in | Electron coordinates | + | ~coord~ | ~double[3][walk_num][elec_num]~ | in | Electron coordinates | | ~ee_distance~ | ~double[walk_num][elec_num][elec_num]~ | out | Electron-electron rescaled distances | #+begin_src f90 :comments org :tangle (eval f) :noweb yes @@ -1231,7 +1231,7 @@ integer function qmckl_compute_ee_distance_rescaled_f(context, elec_num, rescale integer*8 , intent(in) :: elec_num double precision , intent(in) :: rescale_factor_kappa_ee integer*8 , intent(in) :: walk_num - double precision , intent(in) :: coord(elec_num,3,walk_num) + double precision , intent(in) :: coord(elec_num,walk_num,3) double precision , intent(out) :: ee_distance_rescaled(elec_num,elec_num,walk_num) integer*8 :: k @@ -1357,7 +1357,7 @@ assert(fabs(ee_distance_rescaled[elec_num*elec_num+1]-0.9985724058042633) < 1.e- #+end_src -** Electron-electron rescaled distance gradients and laplacian with respect to electron coords +** Electron-electron rescaled distance gradients and Laplacian with respect to electron coords The rescaled distances which is given as $R = (1 - \exp{-\kappa r})/\kappa$ needs to be perturbed with respect to the electorn coordinates. @@ -1464,7 +1464,7 @@ qmckl_exit_code qmckl_provide_ee_distance_rescaled_deriv_e(qmckl_context context | ~elec_num~ | ~int64_t~ | in | Number of electrons | | ~rescale_factor_kappa_ee~ | ~double~ | in | Factor to rescale ee distances | | ~walk_num~ | ~int64_t~ | in | Number of walkers | - | ~coord~ | ~double[walk_num][3][elec_num]~ | in | Electron coordinates | + | ~coord~ | ~double[3][walk_num][elec_num]~ | in | Electron coordinates | | ~ee_distance_deriv_e~ | ~double[walk_num][4][elec_num][elec_num]~ | out | Electron-electron rescaled distance derivatives | #+begin_src f90 :comments org :tangle (eval f) :noweb yes @@ -1477,7 +1477,7 @@ integer function qmckl_compute_ee_distance_rescaled_deriv_e_f(context, elec_num, integer*8 , intent(in) :: elec_num double precision , intent(in) :: rescale_factor_kappa_ee integer*8 , intent(in) :: walk_num - double precision , intent(in) :: coord(elec_num,3,walk_num) + double precision , intent(in) :: coord(elec_num,walk_num,3) double precision , intent(out) :: ee_distance_rescaled_deriv_e(4,elec_num,elec_num,walk_num) integer*8 :: k @@ -1501,8 +1501,8 @@ integer function qmckl_compute_ee_distance_rescaled_deriv_e_f(context, elec_num, do k=1,walk_num info = qmckl_distance_rescaled_deriv_e(context, 'T', 'T', elec_num, elec_num, & - coord(1,1,k), elec_num, & - coord(1,1,k), elec_num, & + coord(1,k,1), elec_num*walk_num, & + coord(1,k,1), elec_num*walk_num, & ee_distance_rescaled_deriv_e(1,1,1,k), elec_num, rescale_factor_kappa_ee) if (info /= QMCKL_SUCCESS) then exit @@ -1905,7 +1905,7 @@ qmckl_exit_code qmckl_provide_en_distance(qmckl_context context) | ~elec_num~ | ~int64_t~ | in | Number of electrons | | ~nucl_num~ | ~int64_t~ | in | Number of nuclei | | ~walk_num~ | ~int64_t~ | in | Number of walkers | - | ~elec_coord~ | ~double[walk_num][3][elec_num]~ | in | Electron coordinates | + | ~elec_coord~ | ~double[3][walk_num][elec_num]~ | in | Electron coordinates | | ~nucl_coord~ | ~double[3][elec_num]~ | in | Nuclear coordinates | | ~en_distance~ | ~double[walk_num][nucl_num][elec_num]~ | out | Electron-nucleus distances | @@ -2183,7 +2183,7 @@ qmckl_exit_code qmckl_provide_en_distance_rescaled(qmckl_context context) | ~nucl_num~ | ~int64_t~ | in | Number of nuclei | | ~rescale_factor_kappa_en~ | ~double~ | in | The factor for rescaled distances | | ~walk_num~ | ~int64_t~ | in | Number of walkers | - | ~elec_coord~ | ~double[walk_num][3][elec_num]~ | in | Electron coordinates | + | ~elec_coord~ | ~double[3][walk_num][elec_num]~ | in | Electron coordinates | | ~nucl_coord~ | ~double[3][elec_num]~ | in | Nuclear coordinates | | ~en_distance_rescaled~ | ~double[walk_num][nucl_num][elec_num]~ | out | Electron-nucleus distances | @@ -2471,9 +2471,9 @@ qmckl_exit_code qmckl_provide_en_distance_rescaled_deriv_e(qmckl_context context | ~nucl_num~ | ~int64_t~ | in | Number of nuclei | | ~rescale_factor_kappa_en~ | ~double~ | in | The factor for rescaled distances | | ~walk_num~ | ~int64_t~ | in | Number of walkers | - | ~elec_coord~ | ~double[walk_num][3][elec_num]~ | in | Electron coordinates | + | ~elec_coord~ | ~double[3][walk_num][elec_num]~ | in | Electron coordinates | | ~nucl_coord~ | ~double[3][elec_num]~ | in | Nuclear coordinates | - | ~en_distance_rescaled_deriv_e~ | ~double[walk_num][4][nucl_num][elec_num]~ | out | Electron-nucleus distance derivatives | + | ~en_distance_rescaled_deriv_e~ | ~double[walk_num][nucl_num][elec_num][4]~ | out | Electron-nucleus distance derivatives | #+begin_src f90 :comments org :tangle (eval f) :noweb yes integer function qmckl_compute_en_distance_rescaled_deriv_e_f(context, elec_num, nucl_num, & @@ -2843,7 +2843,7 @@ assert (rc == QMCKL_SUCCESS); *** Compute :noexport: - # begin_src f90 :comments org :tangle (eval f) :noweb yes + # begin_src f90 :comments org :tangle (eval f) :noweb yes subroutine draw_init_points implicit none BEGIN_DOC diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index d4eaaa8..ff94f28 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -154,25 +154,25 @@ int main() { computed data: - | Variable | Type | In/Out | Description | - |----------------------------+---------------------------------------------------------------------+-------------------------------------------------+---------------------------------| - | ~dim_cord_vect~ | ~int64_t~ | Number of unique C coefficients | | - | ~dim_cord_vect_date~ | ~uint64_t~ | Number of unique C coefficients | | - | ~asymp_jasb~ | ~double[2]~ | Asymptotic component | | - | ~asymp_jasb_date~ | ~uint64_t~ | Asymptotic component | | - | ~cord_vect_full~ | ~double[dim_cord_vect][nucl_num]~ | vector of non-zero coefficients | | - | ~cord_vect_full_date~ | ~uint64_t~ | Keep track of changes here | | - | ~lkpm_combined_index~ | ~int64_t[4][dim_cord_vect]~ | Transform l,k,p, and m into consecutive indices | | - | ~lkpm_combined_index_date~ | ~uint64_t~ | Transform l,k,p, and m into consecutive indices | | - | ~tmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients | - | ~dtmp_c~ | ~double[walk_num][elec_num][4][nucl_num][0:cord_num][0:cord_num-1]~ | vector of non-zero coefficients | | + | Variable | Type | In/Out | + |----------------------------+-----------------------------------------------------------------+-------------------------------------------------| + | ~dim_cord_vect~ | ~int64_t~ | Number of unique C coefficients | + | ~dim_cord_vect_date~ | ~uint64_t~ | Number of unique C coefficients | + | ~asymp_jasb~ | ~double[2]~ | Asymptotic component | + | ~asymp_jasb_date~ | ~uint64_t~ | Asymptotic component | + | ~cord_vect_full~ | ~double[dim_cord_vect][nucl_num]~ | vector of non-zero coefficients | + | ~cord_vect_full_date~ | ~uint64_t~ | Keep track of changes here | + | ~lkpm_combined_index~ | ~int64_t[4][dim_cord_vect]~ | Transform l,k,p, and m into consecutive indices | + | ~lkpm_combined_index_date~ | ~uint64_t~ | Transform l,k,p, and m into consecutive indices | + | ~tmp_c~ | ~double[walk_num][cord_num][cord_num+1][nucl_num][elec_num]~ | vector of non-zero coefficients | + | ~dtmp_c~ | ~double[walk_num][elec_num][4][nucl_num][cord_num+1][cord_num]~ | vector of non-zero coefficients | - | ~een_rescaled_n~ | ~double[walk_num][elec_num][nucl_num][0:cord_num]~ | The electron-electron rescaled distances raised to the powers defined by cord | | - | ~een_rescaled_n_date~ | ~uint64_t~ | Keep track of the date of creation | | - | ~een_rescaled_e_deriv_e~ | ~double[walk_num][elec_num][4][elec_num][0:cord_num]~ | The electron-electron rescaled distances raised to the powers defined by cord derivatives wrt electrons | | - | ~een_rescaled_e_deriv_e_date~ | ~uint64_t~ | Keep track of the date of creation | | - | ~een_rescaled_n_deriv_e~ | ~double[walk_num][elec_num][4][nucl_num][0:cord_num]~ | The electron-electron rescaled distances raised to the powers defined by cord derivatives wrt electrons | | - | ~een_rescaled_n_deriv_e_date~ | ~uint64_t~ | Keep track of the date of creation | | + | ~een_rescaled_n~ | ~double[walk_num][cord_num+1][nucl_num][elec_num]~ | The electron-electron rescaled distances raised to the powers defined by cord | + | ~een_rescaled_n_date~ | ~uint64_t~ | Keep track of the date of creation | + | ~een_rescaled_e_deriv_e~ | ~double[walk_num][cord_num+1][elec_num][4][elec_num]~ | The electron-electron rescaled distances raised to the powers defined by cord derivatives wrt electrons | + | ~een_rescaled_e_deriv_e_date~ | ~uint64_t~ | Keep track of the date of creation | + | ~een_rescaled_n_deriv_e~ | ~double[walk_num][cord_num+1][nucl_num][4][elec_num]~ | The electron-electron rescaled distances raised to the powers defined by cord derivatives wrt electrons | + | ~een_rescaled_n_deriv_e_date~ | ~uint64_t~ | Keep track of the date of creation | #+NAME: jastrow_data #+BEGIN_SRC python :results none :exports none @@ -5731,7 +5731,7 @@ qmckl_exit_code qmckl_compute_dtmp_c (const qmckl_context context, #+end_src #+begin_src c :tangle (eval h_private_func) :comments org - qmckl_exit_code qmckl_compute_dtmp_c_hpc ( +qmckl_exit_code qmckl_compute_dtmp_c_hpc ( const qmckl_context context, const int64_t cord_num, const int64_t elec_num, diff --git a/org/qmckl_point.org b/org/qmckl_point.org index 07b0863..e9ce0b7 100644 --- a/org/qmckl_point.org +++ b/org/qmckl_point.org @@ -81,7 +81,7 @@ int main() { |----------+----------------+-------------------------------------------| | ~num~ | ~int64_t~ | Total number of points | | ~date~ | ~uint64_t~ | Last modification date of the coordinates | - | ~coord~ | ~qmckl_matrix~ | ~num~ \times 3 matrix3 | + | ~coord~ | ~qmckl_matrix~ | ~num~ \times 3 matrix | We consider that the matrix is stored 'transposed' and 'normal' corresponds to the 3 \times ~num~ matrix. From bcdbc49d5fdbabbb493a7cb140586e3a1372a07f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 4 Apr 2022 23:53:58 +0200 Subject: [PATCH 029/100] Cleaning --- org/qmckl_jastrow.org | 91 +++++++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 38 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index ff94f28..659f23a 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -2058,9 +2058,10 @@ qmckl_exit_code qmckl_provide_factor_ee_deriv_e(qmckl_context context) | ~factor_ee_deriv_e~ | ~double[walk_num][4][elec_num]~ | out | Electron-electron distances | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_factor_ee_deriv_e_f(context, walk_num, elec_num, up_num, bord_num, & - bord_vector, ee_distance_rescaled, ee_distance_rescaled_deriv_e, & - asymp_jasb, factor_ee_deriv_e) & +integer function qmckl_compute_factor_ee_deriv_e_f( & + context, walk_num, elec_num, up_num, bord_num, & + bord_vector, ee_distance_rescaled, ee_distance_rescaled_deriv_e, & + asymp_jasb, factor_ee_deriv_e) & result(info) use qmckl implicit none @@ -2068,7 +2069,7 @@ integer function qmckl_compute_factor_ee_deriv_e_f(context, walk_num, elec_num, integer*8 , intent(in) :: walk_num, elec_num, bord_num, up_num double precision , intent(in) :: bord_vector(bord_num + 1) double precision , intent(in) :: ee_distance_rescaled(elec_num, elec_num,walk_num) - double precision , intent(in) :: ee_distance_rescaled_deriv_e(4,elec_num, elec_num,walk_num) + double precision , intent(in) :: ee_distance_rescaled_deriv_e(4,elec_num, elec_num,walk_num) !TODO double precision , intent(in) :: asymp_jasb(2) double precision , intent(out) :: factor_ee_deriv_e(elec_num,4,walk_num) @@ -2183,7 +2184,7 @@ end function qmckl_compute_factor_ee_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & +integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & (context, & walk_num, & elec_num, & @@ -2480,9 +2481,10 @@ qmckl_exit_code qmckl_provide_factor_en(qmckl_context context) | ~factor_en~ | ~double[walk_num]~ | out | Electron-nucleus jastrow | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_factor_en_f(context, walk_num, elec_num, nucl_num, type_nucl_num, & - type_nucl_vector, aord_num, aord_vector, & - en_distance_rescaled, factor_en) & +integer function qmckl_compute_factor_en_f( & + context, walk_num, elec_num, nucl_num, type_nucl_num, & + type_nucl_vector, aord_num, aord_vector, & + en_distance_rescaled, factor_en) & result(info) use qmckl implicit none @@ -2828,9 +2830,10 @@ qmckl_exit_code qmckl_provide_factor_en_deriv_e(qmckl_context context) | ~factor_en_deriv_e~ | ~double[walk_num][4][elec_num]~ | out | Electron-nucleus jastrow | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_factor_en_deriv_e_f(context, walk_num, elec_num, nucl_num, type_nucl_num, & - type_nucl_vector, aord_num, aord_vector, & - en_distance_rescaled, en_distance_rescaled_deriv_e, factor_en_deriv_e) & +integer function qmckl_compute_factor_en_deriv_e_f( & + context, walk_num, elec_num, nucl_num, type_nucl_num, & + type_nucl_vector, aord_num, aord_vector, & + en_distance_rescaled, en_distance_rescaled_deriv_e, factor_en_deriv_e) & result(info) use qmckl implicit none @@ -2910,7 +2913,7 @@ integer function qmckl_compute_factor_en_deriv_e_f(context, walk_num, elec_num, lap3 = lap3 - 2.0d0 * aord_vector(2, type_nucl_vector(a)) * dx(ii) * dx(ii) factor_en_deriv_e(i, ii, nw) = factor_en_deriv_e(i, ii, nw) + aord_vector(1, type_nucl_vector(a)) & - * dx(ii) * invden2 & + ,* dx(ii) * invden2 & + power_ser_g(ii) end do @@ -3238,7 +3241,8 @@ qmckl_exit_code qmckl_provide_een_rescaled_e(qmckl_context context) | ~een_rescaled_e~ | ~double[walk_num][0:cord_num][elec_num][elec_num]~ | out | Electron-electron rescaled distances | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_een_rescaled_e_f(context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, & +integer function qmckl_compute_een_rescaled_e_f( & + context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, & ee_distance, een_rescaled_e) & result(info) use qmckl @@ -3343,7 +3347,8 @@ end function qmckl_compute_een_rescaled_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_een_rescaled_e & - (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e) & + (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, & + ee_distance, een_rescaled_e) & bind(C) result(info) use, intrinsic :: iso_c_binding @@ -3574,7 +3579,8 @@ qmckl_exit_code qmckl_provide_een_rescaled_e_deriv_e(qmckl_context context) | ~een_rescaled_e_deriv_e~ | ~double[walk_num][0:cord_num][elec_num][4][elec_num]~ | out | Electron-electron rescaled distances | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_factor_een_rescaled_e_deriv_e_f(context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, & +integer function qmckl_compute_factor_een_rescaled_e_deriv_e_f( & + context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, & coord_new, ee_distance, een_rescaled_e, een_rescaled_e_deriv_e) & result(info) use qmckl @@ -3951,7 +3957,8 @@ qmckl_exit_code qmckl_provide_een_rescaled_n(qmckl_context context) | ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | out | Electron-nucleus rescaled distances | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_een_rescaled_n_f(context, walk_num, elec_num, nucl_num, cord_num, rescale_factor_kappa_en, & +integer function qmckl_compute_een_rescaled_n_f( & + context, walk_num, elec_num, nucl_num, cord_num, rescale_factor_kappa_en, & en_distance, een_rescaled_n) & result(info) use qmckl @@ -4295,9 +4302,10 @@ qmckl_exit_code qmckl_provide_een_rescaled_n_deriv_e(qmckl_context context) | ~een_rescaled_n_deriv_e~ | ~double[walk_num][0:cord_num][nucl_num][4][elec_num]~ | out | Electron-nucleus rescaled distances | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_factor_een_rescaled_n_deriv_e_f(context, walk_num, elec_num, nucl_num, & - cord_num, rescale_factor_kappa_en, & - coord_new, coord, en_distance, een_rescaled_n, een_rescaled_n_deriv_e) & +integer function qmckl_compute_factor_een_rescaled_n_deriv_e_f( & + context, walk_num, elec_num, nucl_num, & + cord_num, rescale_factor_kappa_en, & + coord_new, coord, en_distance, een_rescaled_n, een_rescaled_n_deriv_e) & result(info) use qmckl implicit none @@ -4938,7 +4946,8 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) | ~dim_cord_vect~ | ~int64_t~ | out | dimension of cord_vect_full table | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_dim_cord_vect_f(context, cord_num, dim_cord_vect) & +integer function qmckl_compute_dim_cord_vect_f( & + context, cord_num, dim_cord_vect) & result(info) use qmckl implicit none @@ -5046,7 +5055,8 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( | ~cord_vect_full~ | ~double[dim_cord_vect][nucl_num]~ | out | Full list of coefficients | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_cord_vect_full_f(context, nucl_num, dim_cord_vect, type_nucl_num, & +integer function qmckl_compute_cord_vect_full_f( & + context, nucl_num, dim_cord_vect, type_nucl_num, & type_nucl_vector, cord_vector, cord_vect_full) & result(info) use qmckl @@ -5148,8 +5158,8 @@ end function qmckl_compute_cord_vect_full_f | ~lkpm_combined_index~ | ~int64_t[4][dim_cord_vect]~ | out | Full list of combined indices | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_lkpm_combined_index_f(context, cord_num, dim_cord_vect, & - lkpm_combined_index) & +integer function qmckl_compute_lkpm_combined_index_f( & + context, cord_num, dim_cord_vect, lkpm_combined_index) & result(info) use qmckl implicit none @@ -5281,7 +5291,8 @@ qmckl_exit_code qmckl_compute_lkpm_combined_index ( | ~tmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_tmp_c_doc_f(context, cord_num, elec_num, nucl_num, & +integer function qmckl_compute_tmp_c_doc_f( & + context, cord_num, elec_num, nucl_num, & walk_num, een_rescaled_e, een_rescaled_n, tmp_c) & result(info) use qmckl @@ -5525,7 +5536,8 @@ qmckl_exit_code qmckl_compute_tmp_c (const qmckl_context context, | ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_dtmp_c_doc_f(context, cord_num, elec_num, nucl_num, & +integer function qmckl_compute_dtmp_c_doc_f( & + context, cord_num, elec_num, nucl_num, & walk_num, een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c) & result(info) use qmckl @@ -5975,9 +5987,10 @@ qmckl_exit_code qmckl_provide_factor_een(qmckl_context context) | ~factor_een~ | ~double[walk_num]~ | out | Electron-nucleus jastrow | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_factor_een_naive_f(context, walk_num, elec_num, nucl_num, cord_num,& - dim_cord_vect, cord_vect_full, lkpm_combined_index, & - een_rescaled_e, een_rescaled_n, factor_een) & +integer function qmckl_compute_factor_een_naive_f( & + context, walk_num, elec_num, nucl_num, cord_num,& + dim_cord_vect, cord_vect_full, lkpm_combined_index, & + een_rescaled_e, een_rescaled_n, factor_een) & result(info) use qmckl implicit none @@ -6142,9 +6155,10 @@ end function qmckl_compute_factor_een_naive_f | ~factor_een~ | ~double[walk_num]~ | out | Electron-nucleus jastrow | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_factor_een_f(context, walk_num, elec_num, nucl_num, cord_num, & - dim_cord_vect, cord_vect_full, lkpm_combined_index, & - tmp_c, een_rescaled_n, factor_een) & +integer function qmckl_compute_factor_een_f( & + context, walk_num, elec_num, nucl_num, cord_num, & + dim_cord_vect, cord_vect_full, lkpm_combined_index, & + tmp_c, een_rescaled_n, factor_een) & result(info) use qmckl implicit none @@ -6490,10 +6504,10 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) | ~factor_een_deriv_e~ | ~double[walk_num][4][elec_num]~ | out | Electron-nucleus jastrow | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_factor_een_deriv_e_naive_f(context, walk_num, elec_num, nucl_num, cord_num, & - dim_cord_vect, cord_vect_full, lkpm_combined_index, & - een_rescaled_e, een_rescaled_n, & - een_rescaled_e_deriv_e, een_rescaled_n_deriv_e, factor_een_deriv_e)& +integer function qmckl_compute_factor_een_deriv_e_naive_f( & + context, walk_num, elec_num, nucl_num, cord_num, dim_cord_vect, & + cord_vect_full, lkpm_combined_index, een_rescaled_e, een_rescaled_n, & + een_rescaled_e_deriv_e, een_rescaled_n_deriv_e, factor_een_deriv_e)& result(info) use qmckl implicit none @@ -6689,9 +6703,10 @@ end function qmckl_compute_factor_een_deriv_e_naive_f #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_factor_een_deriv_e_f(context, walk_num, elec_num, nucl_num, cord_num, dim_cord_vect, & - cord_vect_full, lkpm_combined_index, & - tmp_c, dtmp_c, een_rescaled_n, een_rescaled_n_deriv_e, factor_een_deriv_e) & +integer function qmckl_compute_factor_een_deriv_e_f( & + context, walk_num, elec_num, nucl_num, & + cord_num, dim_cord_vect, cord_vect_full, lkpm_combined_index, & + tmp_c, dtmp_c, een_rescaled_n, een_rescaled_n_deriv_e, factor_een_deriv_e)& result(info) use qmckl implicit none From 511eba58430cc1e448f64fc83ab63c15e6b1eb71 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 5 Apr 2022 09:56:13 +0200 Subject: [PATCH 030/100] Fixed dgemm bug --- org/qmckl_jastrow.org | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 659f23a..5d600f6 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5432,18 +5432,19 @@ qmckl_exit_code qmckl_compute_tmp_c_hpc ( const int64_t LDB = elec_num; const int64_t LDC = elec_num; + const int64_t af = elec_num*elec_num; + const int64_t bf = elec_num*nucl_num*(cord_num+1); + const int64_t cf = bf; + for (int64_t nw=0; nw < walk_num; ++nw) { for (int64_t i=0; i Date: Tue, 5 Apr 2022 10:07:25 +0200 Subject: [PATCH 031/100] Fix flag for nvc and nvfortran --- configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index c746a41..fe54fc2 100644 --- a/configure.ac +++ b/configure.ac @@ -201,7 +201,7 @@ AS_IF([test "$BLAS_LIBS" == "$LAPACK_LIBS"], [BLAS_LIBS=""]) case $FC in *ifort*) - FCFLAGS="$FCFLAGS -nofor-main -mp -target=gpu" + FCFLAGS="$FCFLAGS -nofor-main" ;; *nvfortran*) @@ -213,7 +213,7 @@ esac case $CC in *nvc*) - CFLAGS="$CFLAGS -fPIC" + CFLAGS="$CFLAGS -fPIC -mp -target=gpu" ;; esac From 98097e8fa738011f5bc6e156968a82c28e20417e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Delval?= Date: Tue, 5 Apr 2022 11:02:08 +0200 Subject: [PATCH 032/100] Convert GPU implementations to C TODO : Fix naive implementation which seems to be incorrect (probably an issue with indexing) --- org/qmckl_jastrow.org | 1488 +++++++++++++++++++---------------------- 1 file changed, 683 insertions(+), 805 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index e1a7a18..8736c0b 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -151,6 +151,7 @@ int main() { | ~factor_en_deriv_e_date~ | ~uint64_t~ | out | Keep track of the date for the en derivative | | ~factor_een_deriv_e~ | ~double[4][nelec][walk_num]~ | out | Derivative of the Jastrow factor: electron-electron-nucleus part | | ~factor_een_deriv_e_date~ | ~uint64_t~ | out | Keep track of the date for the een derivative | + | ~offload_type~ | ~qmckl_jastrow_offload_type~ | in | Enum type to change offload type at runtime | computed data: @@ -335,7 +336,7 @@ typedef enum qmckl_jastrow_offload_type{ } qmckl_jastrow_offload_type; #+end_src -#+begin_src c :comments org :tangle (eval h_private_type) + #+begin_src c :comments org :tangle (eval h_private_type) typedef struct qmckl_jastrow_struct{ int32_t uninitialized; int64_t aord_num; @@ -1829,6 +1830,72 @@ integer function qmckl_compute_factor_ee_f(context, walk_num, elec_num, up_num, end function qmckl_compute_factor_ee_f #+end_src +#+begin_src c :comments org :tangle (eval c) :noweb yes + qmckl_exit_code qmckl_compute_factor_ee ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* asymp_jasb, + double* const factor_ee ) { + + int ipar; // can we use a smaller integer? + double pow_ser, x, x1, spin_fact, power_ser; + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (walk_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (bord_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + for (int nw = 0; nw < walk_num; ++nw) { + factor_ee[nw] = 0.0; // put init array here. + for (int i = 0; i < elec_num; ++i ) { + for (int j = 0; j < i; ++j) { + //x = ee_distance_rescaled[j * (walk_num * elec_num) + i * (walk_num) + nw]; + x = ee_distance_rescaled[j + i * elec_num + nw*(elec_num * elec_num)]; + x1 = x; + power_ser = 0.0; + spin_fact = 1.0; + ipar = 0; // index of asymp_jasb + + for (int p = 1; p < bord_num; ++p) { + x = x * x1; + power_ser = power_ser + bord_vector[p + 1] * x; + } + + if(i < up_num || j >= up_num) { + spin_fact = 0.5; + ipar = 1; + } + + factor_ee[nw] = factor_ee[nw] + spin_fact * bord_vector[0] * \ + x1 / \ + (1.0 + bord_vector[1] * \ + x1) \ + -asymp_jasb[ipar] + power_ser; + + } + } + } + + return QMCKL_SUCCESS; +} +#+end_src + #+CALL: generate_c_header(table=qmckl_factor_ee_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: @@ -1846,49 +1913,7 @@ end function qmckl_compute_factor_ee_f #+end_src - #+CALL: generate_c_interface(table=qmckl_factor_ee_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_factor_ee & - (context, & - walk_num, & - elec_num, & - up_num, & - bord_num, & - bord_vector, & - ee_distance_rescaled, & - asymp_jasb, & - factor_ee) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: walk_num - integer (c_int64_t) , intent(in) , value :: elec_num - integer (c_int64_t) , intent(in) , value :: up_num - integer (c_int64_t) , intent(in) , value :: bord_num - real (c_double ) , intent(in) :: bord_vector(bord_num + 1) - real (c_double ) , intent(in) :: ee_distance_rescaled(elec_num,elec_num,walk_num) - real (c_double ) , intent(in) :: asymp_jasb(2) - real (c_double ) , intent(out) :: factor_ee(walk_num) - - integer(c_int32_t), external :: qmckl_compute_factor_ee_f - info = qmckl_compute_factor_ee_f & - (context, & - walk_num, & - elec_num, & - up_num, & - bord_num, & - bord_vector, & - ee_distance_rescaled, & - asymp_jasb, & - factor_ee) - - end function qmckl_compute_factor_ee - #+end_src *** Test #+begin_src python :results output :exports none :noweb yes @@ -2569,6 +2594,74 @@ integer function qmckl_compute_factor_en_f(context, walk_num, elec_num, nucl_num end function qmckl_compute_factor_en_f #+end_src + + + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_factor_en ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const int64_t aord_num, + const double* aord_vector, + const double* en_distance_rescaled, + double* const factor_en ) { + + + int ipar; + double x, x1, spin_fact, power_ser; + + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (walk_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (nucl_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + if (aord_num <= 0) { + return QMCKL_INVALID_ARG_7; + } + + + for (int nw = 0; nw < walk_num; ++nw ) { + // init array + factor_en[nw] = 0.0; + for (int a = 0; a < nucl_num; ++a ) { + for (int i = 0; i < elec_num; ++i ) { + // x = ee_distance_rescaled[j * (walk_num * elec_num) + i * (walk_num) + nw]; + x = en_distance_rescaled[i + a * elec_num + nw * (elec_num * nucl_num)]; + x1 = x; + power_ser = 0.0; + + for (int p = 2; p < aord_num+1; ++p) { + x = x * x1; + power_ser = power_ser + aord_vector[(p+1)-1 + (type_nucl_vector[a]-1) * aord_num] * x; + } + + factor_en[nw] = factor_en[nw] + aord_vector[0 + (type_nucl_vector[a]-1)*aord_num] * x1 / \ + (1.0 + aord_vector[1 + (type_nucl_vector[a]-1) * aord_num] * x1) + \ + power_ser; + + } + } + } + + return QMCKL_SUCCESS; +} + #+end_src + #+CALL: generate_c_header(table=qmckl_factor_en_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -2588,53 +2681,6 @@ end function qmckl_compute_factor_en_f #+end_src - #+CALL: generate_c_interface(table=qmckl_factor_en_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_factor_en & - (context, & - walk_num, & - elec_num, & - nucl_num, & - type_nucl_num, & - type_nucl_vector, & - aord_num, & - aord_vector, & - en_distance_rescaled, & - factor_en) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: walk_num - integer (c_int64_t) , intent(in) , value :: elec_num - integer (c_int64_t) , intent(in) , value :: nucl_num - integer (c_int64_t) , intent(in) , value :: type_nucl_num - integer (c_int64_t) , intent(in) :: type_nucl_vector(nucl_num) - integer (c_int64_t) , intent(in) , value :: aord_num - real (c_double ) , intent(in) :: aord_vector(aord_num + 1, type_nucl_num) - real (c_double ) , intent(in) :: en_distance_rescaled(elec_num, nucl_num, walk_num) - real (c_double ) , intent(out) :: factor_en(walk_num) - - integer(c_int32_t), external :: qmckl_compute_factor_en_f - info = qmckl_compute_factor_en_f & - (context, & - walk_num, & - elec_num, & - nucl_num, & - type_nucl_num, & - type_nucl_vector, & - aord_num, & - aord_vector, & - en_distance_rescaled, & - factor_en) - - end function qmckl_compute_factor_en - #+end_src - *** Test #+begin_src python :results output :exports none :noweb yes import numpy as np @@ -4002,6 +4048,70 @@ integer function qmckl_compute_een_rescaled_n_f(context, walk_num, elec_num, nuc end function qmckl_compute_een_rescaled_n_f #+end_src + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_een_rescaled_n ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t cord_num, + const double rescale_factor_kappa_en, + const double* en_distance, + double* const een_rescaled_n ) { + + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (walk_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (nucl_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_5; + } + + // Prepare table of exponentiated distances raised to appropriate power + for (int i = 0; i < (walk_num*(cord_num+1)*nucl_num*elec_num); ++i) { + een_rescaled_n[i] = 17.0; + } + + for (int nw = 0; nw < walk_num; ++nw) { + for (int a = 0; a < nucl_num; ++a) { + for (int i = 0; i < elec_num; ++i) { + // prepare the actual een table + //een_rescaled_n(:, :, 0, nw) = 1.0d0 + een_rescaled_n[i + a * elec_num + 0 + nw * elec_num*nucl_num*(cord_num+1)] = 1.0; + //een_rescaled_n(i, a, 1, nw) = dexp(-rescale_factor_kappa_en * en_distance(i, a, nw)) + een_rescaled_n[i + a*elec_num + elec_num*nucl_num + nw*elec_num*nucl_num*(cord_num+1)] = exp(-rescale_factor_kappa_en * \ + en_distance[i + a*elec_num + nw*elec_num*nucl_num]); + } + } + + for (int l = 2; l < (cord_num+1); ++l){ + for (int a = 0; a < nucl_num; ++a) { + for (int i = 0; i < elec_num; ++i) { + een_rescaled_n[i + a*elec_num + l*elec_num*nucl_num + nw*elec_num*nucl_num*(cord_num+1)] = een_rescaled_n[i + a*elec_num + (l-1)*elec_num*nucl_num + nw*elec_num*nucl_num*(cord_num+1)] *\ + een_rescaled_n[i + a*elec_num + elec_num*nucl_num + nw*elec_num*nucl_num*(cord_num+1)]; + } + } + } + + } + + return QMCKL_SUCCESS; +} + #+end_src + #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_n_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: @@ -4017,47 +4127,6 @@ end function qmckl_compute_een_rescaled_n_f double* const een_rescaled_n ); #+end_src - #+CALL: generate_c_interface(table=qmckl_factor_een_rescaled_n_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_een_rescaled_n & - (context, & - walk_num, & - elec_num, & - nucl_num, & - cord_num, & - rescale_factor_kappa_en, & - en_distance, & - een_rescaled_n) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: walk_num - integer (c_int64_t) , intent(in) , value :: elec_num - integer (c_int64_t) , intent(in) , value :: nucl_num - integer (c_int64_t) , intent(in) , value :: cord_num - real (c_double ) , intent(in) , value :: rescale_factor_kappa_en - real (c_double ) , intent(in) :: en_distance(nucl_num,elec_num,walk_num) - real (c_double ) , intent(out) :: een_rescaled_n(nucl_num,elec_num,0:cord_num,walk_num) - - integer(c_int32_t), external :: qmckl_compute_een_rescaled_n_f - info = qmckl_compute_een_rescaled_n_f & - (context, & - walk_num, & - elec_num, & - nucl_num, & - cord_num, & - rescale_factor_kappa_en, & - en_distance, & - een_rescaled_n) - - end function qmckl_compute_een_rescaled_n - #+end_src - *** Test #+begin_src python :results output :exports none :noweb yes @@ -4116,7 +4185,6 @@ assert(fabs(een_rescaled_n[0][1][0][4]-0.023391817607642338) < 1.e-12); assert(fabs(een_rescaled_n[0][2][1][3]-0.880957224822116) < 1.e-12); assert(fabs(een_rescaled_n[0][2][1][4]-0.027185942659395074) < 1.e-12); assert(fabs(een_rescaled_n[0][2][1][5]-0.01343938025140174) < 1.e-12); - #+end_src ** Electron-nucleus rescaled distances for each order and derivatives @@ -4779,7 +4847,6 @@ qmckl_exit_code qmckl_provide_lkpm_combined_index(qmckl_context context) qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) { - if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return QMCKL_NULL_CONTEXT; } @@ -4824,6 +4891,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.tmp_c); + default_compute = false; if (rc != QMCKL_SUCCESS) { return rc; } @@ -4841,6 +4909,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.tmp_c); + default_compute = false; if (rc != QMCKL_SUCCESS) { return rc; } @@ -4871,7 +4940,6 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) { - if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return QMCKL_NULL_CONTEXT; } @@ -5020,6 +5088,43 @@ integer function qmckl_compute_dim_cord_vect_f(context, cord_num, dim_cord_vect) end function qmckl_compute_dim_cord_vect_f #+end_src + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_dim_cord_vect ( + const qmckl_context context, + const int64_t cord_num, + int64_t* const dim_cord_vect){ + + int lmax; + + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + *dim_cord_vect = 0; + + for (int p=2; p <= cord_num; ++p){ + for (int k=p-1; k >= 0; --k) { + if (k != 0) { + lmax = p - k; + } else { + lmax = p - k - 2; + } + for (int l = lmax; l >= 0; --l) { + if ( ((p - k - l) & 1)==1) continue; + *dim_cord_vect=*dim_cord_vect+1; + } + } + } + + return QMCKL_SUCCESS; +} + #+end_src + #+CALL: generate_c_header(table=qmckl_factor_dim_cord_vect_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: @@ -5031,28 +5136,6 @@ end function qmckl_compute_dim_cord_vect_f #+end_src - #+CALL: generate_c_interface(table=qmckl_factor_dim_cord_vect_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_dim_cord_vect & - (context, cord_num, dim_cord_vect) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: cord_num - integer (c_int64_t) , intent(out) :: dim_cord_vect - - integer(c_int32_t), external :: qmckl_compute_dim_cord_vect_f - info = qmckl_compute_dim_cord_vect_f & - (context, cord_num, dim_cord_vect) - - end function qmckl_compute_dim_cord_vect - #+end_src - *** Compute cord_vect_full :PROPERTIES: :Name: qmckl_compute_cord_vect_full @@ -5171,7 +5254,7 @@ end function qmckl_compute_cord_vect_full_f | ~context~ | ~qmckl_context~ | in | Global state | | ~cord_num~ | ~int64_t~ | in | Order of polynomials | | ~dim_cord_vect~ | ~int64_t~ | in | dimension of cord full table | - | ~lpkm_combined_index~ | ~int64_t[4][dim_cord_vect]~ | out | Full list of combined indices | + | ~lkpm_combined_index~ | ~int64_t[4][dim_cord_vect]~ | out | Full list of combined indices | #+begin_src f90 :comments org :tangle (eval f) :noweb yes integer function qmckl_compute_lkpm_combined_index_f(context, cord_num, dim_cord_vect, & @@ -5227,6 +5310,53 @@ integer function qmckl_compute_lkpm_combined_index_f(context, cord_num, dim_cord end function qmckl_compute_lkpm_combined_index_f #+end_src + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_lkpm_combined_index ( + const qmckl_context context, + const int64_t cord_num, + const int64_t dim_cord_vect, + int64_t* const lkpm_combined_index ) { + + int kk, lmax, m; + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (dim_cord_vect <= 0) { + return QMCKL_INVALID_ARG_3; + } + +/* +*/ + kk = 0; + for (int p = 2; p <= cord_num; ++p) { + for (int k=(p-1); k >= 0; --k) { + if (k != 0) { + lmax = p - k; + } else { + lmax = p - k - 2; + } + for (int l=lmax; l >= 0; --l) { + if (((p - k - l) & 1) == 1) continue; + m = (p - k - l)/2; + lkpm_combined_index[kk ] = l; + lkpm_combined_index[kk + dim_cord_vect] = k; + lkpm_combined_index[kk + 2*dim_cord_vect] = p; + lkpm_combined_index[kk + 3*dim_cord_vect] = m; + kk = kk + 1; + } + } + } + + return QMCKL_SUCCESS; +} + #+end_src + #+CALL: generate_c_header(table=qmckl_factor_lkpm_combined_index_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: @@ -5235,32 +5365,10 @@ end function qmckl_compute_lkpm_combined_index_f const qmckl_context context, const int64_t cord_num, const int64_t dim_cord_vect, - int64_t* const lpkm_combined_index ); + int64_t* const lkpm_combined_index ); #+end_src - #+CALL: generate_c_interface(table=qmckl_factor_lkpm_combined_index_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_lkpm_combined_index & - (context, cord_num, dim_cord_vect, lpkm_combined_index) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: cord_num - integer (c_int64_t) , intent(in) , value :: dim_cord_vect - integer (c_int64_t) , intent(out) :: lpkm_combined_index(dim_cord_vect,4) - - integer(c_int32_t), external :: qmckl_compute_lkpm_combined_index_f - info = qmckl_compute_lkpm_combined_index_f & - (context, cord_num, dim_cord_vect, lpkm_combined_index) - - end function qmckl_compute_lkpm_combined_index - #+end_src *** Compute tmp_c :PROPERTIES: @@ -5348,6 +5456,73 @@ integer function qmckl_compute_tmp_c_f(context, cord_num, elec_num, nucl_num, & end function qmckl_compute_tmp_c_f #+end_src + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_tmp_c ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ) { + + qmckl_exit_code info; + int i, j, a, l, kk, p, lmax, nw; + char TransA, TransB; + double alpha, beta; + int M, N, K, LDA, LDB, LDC; + + TransA = 'N'; + TransB = 'N'; + alpha = 1.0; + beta = 0.0; + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (nucl_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + M = elec_num; + N = nucl_num*(cord_num + 1); + K = elec_num; + + LDA = sizeof(een_rescaled_e)/sizeof(double); + LDB = sizeof(een_rescaled_n)/sizeof(double); + LDC = sizeof(tmp_c)/sizeof(double); + + for (int nw=0; nw < walk_num; ++nw) { + for (int i=0; ijastrow.factor_een_deriv_e = factor_een_deriv_e; } - /* Choose the correct compute function (depending on offload type) */ - bool default_compute = true; - - #ifdef HAVE_OPENACC_OFFLOAD - if(ctx->jastrow.offload_type == OFFLOAD_OPENACC) { - qmckl_exit_code rc = - // CPU version - qmckl_compute_factor_een_deriv_e(context, - - // GPU version : No speedup on this kernel yet - // qmckl_compute_factor_een_deriv_e_acc_offload(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.cord_vect_full, - ctx->jastrow.lkpm_combined_index, - ctx->jastrow.tmp_c, - ctx->jastrow.dtmp_c, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.een_rescaled_n_deriv_e, - ctx->jastrow.factor_een_deriv_e); - default_compute = false; - if (rc != QMCKL_SUCCESS) { - return rc; - } - } - #endif - - if(default_compute) { qmckl_exit_code rc = qmckl_compute_factor_een_deriv_e(context, ctx->electron.walk_num, @@ -6779,9 +6858,8 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) ctx->jastrow.een_rescaled_n, ctx->jastrow.een_rescaled_n_deriv_e, ctx->jastrow.factor_een_deriv_e); - if (rc != QMCKL_SUCCESS) { - return rc; - } + if (rc != QMCKL_SUCCESS) { + return rc; } ctx->jastrow.factor_een_deriv_e_date = ctx->date; @@ -7180,206 +7258,6 @@ end function qmckl_compute_factor_een_deriv_e_f end function qmckl_compute_factor_een_deriv_e #+end_src -*** Compute (OpenACC offload) - :PROPERTIES: - :Name: qmckl_compute_factor_een_deriv_e - :CRetType: qmckl_exit_code - :FRetType: qmckl_exit_code - :END: - - #+NAME: qmckl_factor_een_deriv_e_acc_offload_args - | Variable | Type | In/Out | Description | - |--------------------------+---------------------------------------------------------------------+--------+------------------------------------------------| - | ~context~ | ~qmckl_context~ | in | Global state | - | ~walk_num~ | ~int64_t~ | in | Number of walkers | - | ~elec_num~ | ~int64_t~ | in | Number of electrons | - | ~nucl_num~ | ~int64_t~ | in | Number of nucleii | - | ~cord_num~ | ~int64_t~ | in | order of polynomials | - | ~dim_cord_vect~ | ~int64_t~ | in | dimension of full coefficient vector | - | ~cord_vect_full~ | ~double[dim_cord_vect][nucl_num]~ | in | full coefficient vector | - | ~lkpm_combined_index~ | ~int64_t[4][dim_cord_vect]~ | in | combined indices | - | ~tmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | in | Temporary intermediate tensor | - | ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][4][elec_num]~ | in | vector of non-zero coefficients | - | ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor | - | ~een_rescaled_n_deriv_e~ | ~double[walk_num][0:cord_num][nucl_num][4][elec_num]~ | in | Derivative of Electron-nucleus rescaled factor | - | ~factor_een_deriv_e~ | ~double[walk_num][4][elec_num]~ | out | Derivative of Electron-nucleus jastrow | - - - #+begin_src f90 :comments org :tangle (eval f) :noweb yes -#ifdef HAVE_OPENACC_OFFLOAD -integer function qmckl_compute_factor_een_deriv_e_acc_offload_f(context, walk_num, elec_num, nucl_num, cord_num, dim_cord_vect, & - cord_vect_full, lkpm_combined_index, & - tmp_c, dtmp_c, een_rescaled_n, een_rescaled_n_deriv_e, factor_een_deriv_e) & - result(info) - use qmckl - implicit none - integer(qmckl_context), intent(in) :: context - integer*8 , intent(in) :: walk_num, elec_num, cord_num, nucl_num, dim_cord_vect - integer*8 , intent(in) :: lkpm_combined_index(dim_cord_vect,4) - double precision , intent(in) :: cord_vect_full(nucl_num, dim_cord_vect) - double precision , intent(in) :: tmp_c(elec_num, nucl_num,0:cord_num, 0:cord_num-1, walk_num) - double precision , intent(in) :: dtmp_c(elec_num, 4, nucl_num,0:cord_num, 0:cord_num-1, walk_num) - double precision , intent(in) :: een_rescaled_n(elec_num, nucl_num, 0:cord_num, walk_num) - double precision , intent(in) :: een_rescaled_n_deriv_e(elec_num, 4, nucl_num, 0:cord_num, walk_num) - double precision , intent(out) :: factor_een_deriv_e(elec_num,4,walk_num) - - integer*8 :: i, a, j, l, k, p, m, n, nw, ii - double precision :: accu, accu2, cn - - info = QMCKL_SUCCESS - - if (context == QMCKL_NULL_CONTEXT) then - info = QMCKL_INVALID_CONTEXT - return - endif - - if (walk_num <= 0) then - info = QMCKL_INVALID_ARG_2 - return - endif - - if (elec_num <= 0) then - info = QMCKL_INVALID_ARG_3 - return - endif - - if (nucl_num <= 0) then - info = QMCKL_INVALID_ARG_4 - return - endif - - if (cord_num <= 0) then - info = QMCKL_INVALID_ARG_5 - return - endif - - factor_een_deriv_e = 0.0d0 - - !$acc parallel - do nw =1, walk_num - do n = 1, dim_cord_vect - l = lkpm_combined_index(n, 1) - k = lkpm_combined_index(n, 2) - p = lkpm_combined_index(n, 3) - m = lkpm_combined_index(n, 4) - - do a = 1, nucl_num - cn = cord_vect_full(a, n) - if(cn == 0.d0) cycle - - !$acc loop collapse(2) - do ii = 1, 4 - do j = 1, elec_num - factor_een_deriv_e(j,ii,nw) = factor_een_deriv_e(j,ii,nw) + (& - tmp_c(j,a,m,k,nw) * een_rescaled_n_deriv_e(j,ii,a,m+l,nw) + & - (dtmp_c(j,ii,a,m,k,nw)) * een_rescaled_n(j,a,m+l,nw) + & - (dtmp_c(j,ii,a,m+l,k,nw)) * een_rescaled_n(j,a,m ,nw) + & - tmp_c(j,a,m+l,k,nw) * een_rescaled_n_deriv_e(j,ii,a,m,nw) & - ) * cn - end do - end do - - cn = cn + cn - - !$acc loop - do j = 1, elec_num - factor_een_deriv_e(j,4,nw) = factor_een_deriv_e(j,4,nw) + (& - (dtmp_c(j,1,a,m ,k,nw)) * een_rescaled_n_deriv_e(j,1,a,m+l,nw) + & - (dtmp_c(j,2,a,m ,k,nw)) * een_rescaled_n_deriv_e(j,2,a,m+l,nw) + & - (dtmp_c(j,3,a,m ,k,nw)) * een_rescaled_n_deriv_e(j,3,a,m+l,nw) + & - (dtmp_c(j,1,a,m+l,k,nw)) * een_rescaled_n_deriv_e(j,1,a,m ,nw) + & - (dtmp_c(j,2,a,m+l,k,nw)) * een_rescaled_n_deriv_e(j,2,a,m ,nw) + & - (dtmp_c(j,3,a,m+l,k,nw)) * een_rescaled_n_deriv_e(j,3,a,m ,nw) & - ) * cn - end do - end do - end do - end do - !$acc end parallel -end function qmckl_compute_factor_een_deriv_e_acc_offload_f -#endif - #+end_src - - #+CALL: generate_c_header(table=qmckl_factor_een_deriv_e_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org -#ifdef HAVE_OPENACC_OFFLOAD - qmckl_exit_code qmckl_compute_factor_een_deriv_e_acc_offload ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t cord_num, - const int64_t dim_cord_vect, - const double* cord_vect_full, - const int64_t* lkpm_combined_index, - const double* tmp_c, - const double* dtmp_c, - const double* een_rescaled_n, - const double* een_rescaled_n_deriv_e, - double* const factor_een_deriv_e ); -#endif - #+end_src - -#+CALL: generate_c_interface(table=qmckl_factor_een_deriv_e_acc_offload_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src f90 :tangle (eval f) :comments org :exports none -#ifdef HAVE_OPENACC_OFFLOAD - integer(c_int32_t) function qmckl_compute_factor_een_deriv_e_acc_offload & - (context, & - walk_num, & - elec_num, & - nucl_num, & - cord_num, & - dim_cord_vect, & - cord_vect_full, & - lkpm_combined_index, & - tmp_c, & - dtmp_c, & - een_rescaled_n, & - een_rescaled_n_deriv_e, & - factor_een_deriv_e) & - bind(C) result(info) - - use, intrinsic :: iso_c_binding - implicit none - - integer (c_int64_t) , intent(in) , value :: context - integer (c_int64_t) , intent(in) , value :: walk_num - integer (c_int64_t) , intent(in) , value :: elec_num - integer (c_int64_t) , intent(in) , value :: nucl_num - integer (c_int64_t) , intent(in) , value :: cord_num - integer (c_int64_t) , intent(in) , value :: dim_cord_vect - real (c_double ) , intent(in) :: cord_vect_full(nucl_num,dim_cord_vect) - integer (c_int64_t) , intent(in) :: lkpm_combined_index(dim_cord_vect,4) - real (c_double ) , intent(in) :: tmp_c(elec_num,nucl_num,0:cord_num,0:cord_num-1,walk_num) - real (c_double ) , intent(in) :: dtmp_c(elec_num,4,nucl_num,0:cord_num,0:cord_num-1,walk_num) - real (c_double ) , intent(in) :: een_rescaled_n(elec_num,nucl_num,0:cord_num,walk_num) - real (c_double ) , intent(in) :: een_rescaled_n_deriv_e(elec_num,4,nucl_num,0:cord_num,walk_num) - real (c_double ) , intent(out) :: factor_een_deriv_e(elec_num,4,walk_num) - - integer(c_int32_t), external :: qmckl_compute_factor_een_deriv_e_acc_offload_f - info = qmckl_compute_factor_een_deriv_e_acc_offload_f & - (context, & - walk_num, & - elec_num, & - nucl_num, & - cord_num, & - dim_cord_vect, & - cord_vect_full, & - lkpm_combined_index, & - tmp_c, & - dtmp_c, & - een_rescaled_n, & - een_rescaled_n_deriv_e, & - factor_een_deriv_e) - - end function qmckl_compute_factor_een_deriv_e_acc_offload -#endif - #+end_src - *** Test #+begin_src python :results output :exports none :noweb yes import numpy as np From 731fded4a80da4cf66380f6563da435c36a4b4c0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 5 Apr 2022 10:50:51 +0200 Subject: [PATCH 033/100] warnings --- org/qmckl_determinant.org | 69 +++++++++++++++++---------------------- org/qmckl_nucleus.org | 37 ++++++++++----------- 2 files changed, 48 insertions(+), 58 deletions(-) diff --git a/org/qmckl_determinant.org b/org/qmckl_determinant.org index 0412db6..8c205fc 100644 --- a/org/qmckl_determinant.org +++ b/org/qmckl_determinant.org @@ -1134,36 +1134,28 @@ end function qmckl_compute_det_vgl_beta_f #+begin_src c :tangle (eval c_test) :exports none -#define walk_num chbrclf_walk_num -#define elec_num chbrclf_elec_num -#define shell_num chbrclf_shell_num -#define ao_num chbrclf_ao_num - -int64_t elec_up_num = chbrclf_elec_up_num; -int64_t elec_dn_num = chbrclf_elec_dn_num; double* elec_coord = &(chbrclf_elec_coord[0][0][0]); -const int64_t nucl_num = chbrclf_nucl_num; const double* nucl_charge = chbrclf_charge; const double* nucl_coord = &(chbrclf_nucl_coord[0][0]); -rc = qmckl_set_electron_num (context, elec_up_num, elec_dn_num); +rc = qmckl_set_electron_num (context, chbrclf_elec_up_num, chbrclf_elec_dn_num); assert (rc == QMCKL_SUCCESS); -rc = qmckl_set_electron_walk_num (context, walk_num); +rc = qmckl_set_electron_walk_num (context, chbrclf_walk_num); assert (rc == QMCKL_SUCCESS); assert(qmckl_electron_provided(context)); -rc = qmckl_set_electron_coord (context, 'N', elec_coord, walk_num*elec_num*3); +rc = qmckl_set_electron_coord (context, 'N', elec_coord, chbrclf_walk_num*chbrclf_elec_num*3); assert(rc == QMCKL_SUCCESS); -rc = qmckl_set_nucleus_num (context, nucl_num); +rc = qmckl_set_nucleus_num (context, chbrclf_nucl_num); assert(rc == QMCKL_SUCCESS); -rc = qmckl_set_nucleus_coord (context, 'T', &(nucl_coord[0]), nucl_num*3); +rc = qmckl_set_nucleus_coord (context, 'T', &(nucl_coord[0]), chbrclf_nucl_num*3); assert(rc == QMCKL_SUCCESS); -rc = qmckl_set_nucleus_charge(context, nucl_charge, nucl_num); +rc = qmckl_set_nucleus_charge(context, nucl_charge, chbrclf_nucl_num); assert(rc == QMCKL_SUCCESS); assert(qmckl_nucleus_provided(context)); @@ -1195,27 +1187,27 @@ rc = qmckl_set_ao_basis_prim_num (context, chbrclf_prim_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_ao_basis_provided(context)); -rc = qmckl_set_ao_basis_nucleus_index (context, nucleus_index, nucl_num); +rc = qmckl_set_ao_basis_nucleus_index (context, nucleus_index, chbrclf_nucl_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_ao_basis_provided(context)); -rc = qmckl_set_ao_basis_nucleus_shell_num (context, nucleus_shell_num, nucl_num); +rc = qmckl_set_ao_basis_nucleus_shell_num (context, nucleus_shell_num, chbrclf_nucl_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_ao_basis_provided(context)); -rc = qmckl_set_ao_basis_shell_ang_mom (context, shell_ang_mom, shell_num); +rc = qmckl_set_ao_basis_shell_ang_mom (context, shell_ang_mom, chbrclf_shell_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_ao_basis_provided(context)); -rc = qmckl_set_ao_basis_shell_factor (context, shell_factor, shell_num); +rc = qmckl_set_ao_basis_shell_factor (context, shell_factor, chbrclf_shell_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_ao_basis_provided(context)); -rc = qmckl_set_ao_basis_shell_prim_num (context, shell_prim_num, shell_num); +rc = qmckl_set_ao_basis_shell_prim_num (context, shell_prim_num, chbrclf_shell_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_ao_basis_provided(context)); -rc = qmckl_set_ao_basis_shell_prim_index (context, shell_prim_index, shell_num); +rc = qmckl_set_ao_basis_shell_prim_index (context, shell_prim_index, chbrclf_shell_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_ao_basis_provided(context)); @@ -1239,14 +1231,13 @@ assert(rc == QMCKL_SUCCESS); assert(qmckl_ao_basis_provided(context)); -double ao_vgl[walk_num*elec_num][5][chbrclf_ao_num]; +double ao_vgl[chbrclf_walk_num*chbrclf_elec_num][5][chbrclf_ao_num]; -rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0]), (int64_t) 5*walk_num*elec_num*chbrclf_ao_num); +rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0]), (int64_t) 5*chbrclf_walk_num*chbrclf_elec_num*chbrclf_ao_num); assert (rc == QMCKL_SUCCESS); /* Set up MO data */ -const int64_t mo_num = chbrclf_mo_num; -rc = qmckl_set_mo_basis_mo_num(context, mo_num); +rc = qmckl_set_mo_basis_mo_num(context, chbrclf_mo_num); assert (rc == QMCKL_SUCCESS); const double * mo_coefficient = &(chbrclf_mo_coef[0]); @@ -1256,31 +1247,31 @@ assert (rc == QMCKL_SUCCESS); assert(qmckl_mo_basis_provided(context)); -double mo_vgl[walk_num*elec_num][5][chbrclf_mo_num]; -rc = qmckl_get_mo_basis_mo_vgl(context, &(mo_vgl[0][0][0]), 5*walk_num*elec_num*chbrclf_mo_num); +double mo_vgl[chbrclf_walk_num*chbrclf_elec_num][5][chbrclf_mo_num]; +rc = qmckl_get_mo_basis_mo_vgl(context, &(mo_vgl[0][0][0]), 5*chbrclf_walk_num*chbrclf_elec_num*chbrclf_mo_num); assert (rc == QMCKL_SUCCESS); /* Set up determinant data */ -const int64_t det_num_alpha = 1; -const int64_t det_num_beta = 1; -int64_t mo_index_alpha[det_num_alpha][walk_num][elec_up_num]; -int64_t mo_index_beta[det_num_alpha][walk_num][elec_dn_num]; +#define det_num_alpha 1 +#define det_num_beta 1 +int64_t mo_index_alpha[det_num_alpha][chbrclf_walk_num][chbrclf_elec_up_num]; +int64_t mo_index_beta[det_num_alpha][chbrclf_walk_num][chbrclf_elec_dn_num]; int i, j, k; for(k = 0; k < det_num_alpha; ++k) - for(i = 0; i < walk_num; ++i) - for(j = 0; j < elec_up_num; ++j) + for(i = 0; i < chbrclf_walk_num; ++i) + for(j = 0; j < chbrclf_elec_up_num; ++j) mo_index_alpha[k][i][j] = j + 1; for(k = 0; k < det_num_beta; ++k) - for(i = 0; i < walk_num; ++i) - for(j = 0; j < elec_up_num; ++j) + for(i = 0; i < chbrclf_walk_num; ++i) + for(j = 0; j < chbrclf_elec_up_num; ++j) mo_index_beta[k][i][j] = j + 1; rc = qmckl_set_determinant_type (context, typ); assert(rc == QMCKL_SUCCESS); -rc = qmckl_set_determinant_walk_num (context, walk_num); +rc = qmckl_set_determinant_walk_num (context, chbrclf_walk_num); assert (rc == QMCKL_SUCCESS); rc = qmckl_set_determinant_det_num_alpha (context, det_num_alpha); @@ -1297,8 +1288,8 @@ assert (rc == QMCKL_SUCCESS); // Get slater-determinant -double det_vgl_alpha[det_num_alpha][walk_num][5][elec_up_num][elec_up_num]; -double det_vgl_beta[det_num_beta][walk_num][5][elec_dn_num][elec_dn_num]; +double det_vgl_alpha[det_num_alpha][chbrclf_walk_num][5][chbrclf_elec_up_num][chbrclf_elec_up_num]; +double det_vgl_beta[det_num_beta][chbrclf_walk_num][5][chbrclf_elec_dn_num][chbrclf_elec_dn_num]; rc = qmckl_get_det_vgl_alpha(context, &(det_vgl_alpha[0][0][0][0][0])); assert (rc == QMCKL_SUCCESS); @@ -2047,8 +2038,8 @@ end function qmckl_compute_det_inv_matrix_beta_f #+begin_src c :tangle (eval c_test) :exports none // Get adjoint of the slater-determinant -double det_inv_matrix_alpha[det_num_alpha][walk_num][elec_up_num][elec_up_num]; -double det_inv_matrix_beta[det_num_beta][walk_num][elec_dn_num][elec_dn_num]; +double det_inv_matrix_alpha[det_num_alpha][chbrclf_walk_num][chbrclf_elec_up_num][chbrclf_elec_up_num]; +double det_inv_matrix_beta[det_num_beta][chbrclf_walk_num][chbrclf_elec_dn_num][chbrclf_elec_dn_num]; rc = qmckl_get_det_inv_matrix_alpha(context, &(det_inv_matrix_alpha[0][0][0][0])); assert (rc == QMCKL_SUCCESS); diff --git a/org/qmckl_nucleus.org b/org/qmckl_nucleus.org index 319d5d1..e191d3e 100644 --- a/org/qmckl_nucleus.org +++ b/org/qmckl_nucleus.org @@ -672,7 +672,6 @@ end interface ** Test #+begin_src c :tangle (eval c_test) -const int64_t nucl_num = chbrclf_nucl_num; const double* nucl_charge = chbrclf_charge; const double* nucl_coord = &(chbrclf_nucl_coord[0][0]); const double nucl_rescale_factor_kappa = 2.0; @@ -688,13 +687,13 @@ rc = qmckl_get_nucleus_num (context, &n); assert(rc == QMCKL_NOT_PROVIDED); -rc = qmckl_set_nucleus_num (context, nucl_num); +rc = qmckl_set_nucleus_num (context, chbrclf_nucl_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_nucleus_provided(context)); rc = qmckl_get_nucleus_num (context, &n); assert(rc == QMCKL_SUCCESS); -assert(n == nucl_num); +assert(n == chbrclf_nucl_num); double k; rc = qmckl_get_nucleus_rescale_factor (context, &k); @@ -709,41 +708,41 @@ rc = qmckl_get_nucleus_rescale_factor (context, &k); assert(rc == QMCKL_SUCCESS); assert(k == nucl_rescale_factor_kappa); -double nucl_coord2[3*nucl_num]; +double nucl_coord2[3*chbrclf_nucl_num]; -rc = qmckl_get_nucleus_coord (context, 'T', nucl_coord2, 3*nucl_num); +rc = qmckl_get_nucleus_coord (context, 'T', nucl_coord2, 3*chbrclf_nucl_num); assert(rc == QMCKL_NOT_PROVIDED); -rc = qmckl_set_nucleus_coord (context, 'T', &(nucl_coord[0]), 3*nucl_num); +rc = qmckl_set_nucleus_coord (context, 'T', &(nucl_coord[0]), 3*chbrclf_nucl_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_nucleus_provided(context)); -rc = qmckl_get_nucleus_coord (context, 'N', nucl_coord2, 3*nucl_num); +rc = qmckl_get_nucleus_coord (context, 'N', nucl_coord2, 3*chbrclf_nucl_num); assert(rc == QMCKL_SUCCESS); for (size_t k=0 ; k<3 ; ++k) { - for (int64_t i=0 ; i Date: Tue, 5 Apr 2022 11:44:17 +0200 Subject: [PATCH 034/100] Fixed cppcheck --- .github/workflows/test-build.yml | 1 - org/qmckl_ao.org | 3 +- org/qmckl_blas.org | 6 +- org/qmckl_jastrow.org | 300 +++++++++++------------- org/qmckl_mo.org | 14 +- org/qmckl_sherman_morrison_woodbury.org | 2 +- 6 files changed, 154 insertions(+), 172 deletions(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index c7af73f..b4e5b6c 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -4,7 +4,6 @@ on: push: branches: [ master ] pull_request: - branches: [ master ] jobs: x86_ubuntu: diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 8dd1067..8289aa4 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -2634,9 +2634,10 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) { } } - rc = QMCKL_SUCCESS; #ifdef HAVE_HPC rc = qmckl_finalize_basis_hpc(context); +#else + rc = QMCKL_SUCCESS; #endif return rc; diff --git a/org/qmckl_blas.org b/org/qmckl_blas.org index 9cd7e18..1cf76e4 100644 --- a/org/qmckl_blas.org +++ b/org/qmckl_blas.org @@ -84,8 +84,8 @@ are not intended to be passed to external codes. #+begin_src c :comments org :tangle (eval h_private_type) :exports none typedef struct qmckl_vector { - int64_t size; double* restrict data; + int64_t size; } qmckl_vector; #+end_src @@ -160,8 +160,8 @@ qmckl_vector_free( qmckl_context context, #+begin_src c :comments org :tangle (eval h_private_type) :exports none typedef struct qmckl_matrix { - int64_t size[2]; double* restrict data; + int64_t size[2]; } qmckl_matrix; #+end_src @@ -245,9 +245,9 @@ qmckl_matrix_free( qmckl_context context, #define QMCKL_TENSOR_ORDER_MAX 16 typedef struct qmckl_tensor { + double* restrict data; int64_t order; int64_t size[QMCKL_TENSOR_ORDER_MAX]; - double* restrict data; } qmckl_tensor; #+end_src diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index ffbf713..6eaad62 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -837,7 +837,7 @@ qmckl_set_jastrow_type_nucl_vector(qmckl_context context, } if (ctx->jastrow.type_nucl_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.type_nucl_vector); + rc = qmckl_free(context, ctx->jastrow.type_nucl_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_type_nucl_vector", @@ -896,7 +896,7 @@ qmckl_set_jastrow_aord_vector(qmckl_context context, } if (ctx->jastrow.aord_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.aord_vector); + rc = qmckl_free(context, ctx->jastrow.aord_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_ord_vector", @@ -959,7 +959,7 @@ qmckl_set_jastrow_bord_vector(qmckl_context context, } if (ctx->jastrow.bord_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.bord_vector); + rc = qmckl_free(context, ctx->jastrow.bord_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_ord_vector", @@ -1029,7 +1029,7 @@ qmckl_set_jastrow_cord_vector(qmckl_context context, } if (ctx->jastrow.cord_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.cord_vector); + rc = qmckl_free(context, ctx->jastrow.cord_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_ord_vector", @@ -1388,12 +1388,11 @@ qmckl_exit_code qmckl_provide_asymp_jasb(qmckl_context context) ctx->jastrow.asymp_jasb = asymp_jasb; } - qmckl_exit_code rc = - qmckl_compute_asymp_jasb(context, - ctx->jastrow.bord_num, - ctx->jastrow.bord_vector, - rescale_factor_kappa_ee, - ctx->jastrow.asymp_jasb); + rc = qmckl_compute_asymp_jasb(context, + ctx->jastrow.bord_num, + ctx->jastrow.bord_vector, + rescale_factor_kappa_ee, + ctx->jastrow.asymp_jasb); if (rc != QMCKL_SUCCESS) { return rc; } @@ -1470,10 +1469,6 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( const double rescale_factor_kappa_ee, double* const asymp_jasb ) { - double kappa_inv, x, asym_one; - - kappa_inv = 1.0 / rescale_factor_kappa_ee; - if (context == QMCKL_NULL_CONTEXT){ return QMCKL_INVALID_CONTEXT; } @@ -1482,14 +1477,15 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( return QMCKL_INVALID_ARG_2; } - asym_one = bord_vector[0] * kappa_inv / (1.0 + bord_vector[1] * kappa_inv); + const double kappa_inv = 1.0 / rescale_factor_kappa_ee; + const double asym_one = bord_vector[0] * kappa_inv / (1.0 + bord_vector[1] * kappa_inv); asymp_jasb[0] = asym_one; asymp_jasb[1] = 0.5 * asym_one; for (int i = 0 ; i <= 1; ++i) { - x = kappa_inv; + double x = kappa_inv; for (int p = 1; p < bord_num; ++p){ - x = x * kappa_inv; + x *= kappa_inv; asymp_jasb[i] = asymp_jasb[i] + bord_vector[p + 1] * x; } } @@ -1672,16 +1668,15 @@ qmckl_exit_code qmckl_provide_factor_ee(qmckl_context context) ctx->jastrow.factor_ee = factor_ee; } - qmckl_exit_code rc = - qmckl_compute_factor_ee(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->electron.up_num, - ctx->jastrow.bord_num, - ctx->jastrow.bord_vector, - ctx->electron.ee_distance_rescaled, - ctx->jastrow.asymp_jasb, - ctx->jastrow.factor_ee); + rc = qmckl_compute_factor_ee(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->electron.up_num, + ctx->jastrow.bord_num, + ctx->jastrow.bord_vector, + ctx->electron.ee_distance_rescaled, + ctx->jastrow.asymp_jasb, + ctx->jastrow.factor_ee); if (rc != QMCKL_SUCCESS) { return rc; } @@ -2014,17 +2009,16 @@ qmckl_exit_code qmckl_provide_factor_ee_deriv_e(qmckl_context context) ctx->jastrow.factor_ee_deriv_e = factor_ee_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_ee_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->electron.up_num, - ctx->jastrow.bord_num, - ctx->jastrow.bord_vector, - ctx->electron.ee_distance_rescaled, - ctx->electron.ee_distance_rescaled_deriv_e, - ctx->jastrow.asymp_jasb, - ctx->jastrow.factor_ee_deriv_e); + rc = qmckl_compute_factor_ee_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->electron.up_num, + ctx->jastrow.bord_num, + ctx->jastrow.bord_vector, + ctx->electron.ee_distance_rescaled, + ctx->electron.ee_distance_rescaled_deriv_e, + ctx->jastrow.asymp_jasb, + ctx->jastrow.factor_ee_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -2437,21 +2431,20 @@ qmckl_exit_code qmckl_provide_factor_en(qmckl_context context) ctx->jastrow.factor_en = factor_en; } - qmckl_exit_code rc = - qmckl_compute_factor_en(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.type_nucl_num, - ctx->jastrow.type_nucl_vector, - ctx->jastrow.aord_num, - ctx->jastrow.aord_vector, - ctx->electron.en_distance_rescaled, - ctx->jastrow.factor_en); + rc = qmckl_compute_factor_en(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.type_nucl_num, + ctx->jastrow.type_nucl_vector, + ctx->jastrow.aord_num, + ctx->jastrow.aord_vector, + ctx->electron.en_distance_rescaled, + ctx->jastrow.factor_en); if (rc != QMCKL_SUCCESS) { return rc; } - + ctx->jastrow.factor_en_date = ctx->date; } @@ -2784,18 +2777,17 @@ qmckl_exit_code qmckl_provide_factor_en_deriv_e(qmckl_context context) ctx->jastrow.factor_en_deriv_e = factor_en_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_en_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.type_nucl_num, - ctx->jastrow.type_nucl_vector, - ctx->jastrow.aord_num, - ctx->jastrow.aord_vector, - ctx->electron.en_distance_rescaled, - ctx->electron.en_distance_rescaled_deriv_e, - ctx->jastrow.factor_en_deriv_e); + rc = qmckl_compute_factor_en_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.type_nucl_num, + ctx->jastrow.type_nucl_vector, + ctx->jastrow.aord_num, + ctx->jastrow.aord_vector, + ctx->electron.en_distance_rescaled, + ctx->electron.en_distance_rescaled_deriv_e, + ctx->jastrow.factor_en_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -3203,14 +3195,13 @@ qmckl_exit_code qmckl_provide_een_rescaled_e(qmckl_context context) ctx->jastrow.een_rescaled_e = een_rescaled_e; } - qmckl_exit_code rc = - qmckl_compute_een_rescaled_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_ee, - ctx->electron.ee_distance, - ctx->jastrow.een_rescaled_e); + rc = qmckl_compute_een_rescaled_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_ee, + ctx->electron.ee_distance, + ctx->jastrow.een_rescaled_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -3537,16 +3528,15 @@ qmckl_exit_code qmckl_provide_een_rescaled_e_deriv_e(qmckl_context context) ctx->jastrow.een_rescaled_e_deriv_e = een_rescaled_e_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_een_rescaled_e_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_ee, - ctx->electron.coord_new.data, - ctx->electron.ee_distance, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_e_deriv_e); + rc = qmckl_compute_factor_een_rescaled_e_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_ee, + ctx->electron.coord_new.data, + ctx->electron.ee_distance, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_e_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -3917,15 +3907,14 @@ qmckl_exit_code qmckl_provide_een_rescaled_n(qmckl_context context) ctx->jastrow.een_rescaled_n = een_rescaled_n; } - qmckl_exit_code rc = - qmckl_compute_een_rescaled_n(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_en, - ctx->electron.en_distance, - ctx->jastrow.een_rescaled_n); + rc = qmckl_compute_een_rescaled_n(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_en, + ctx->electron.en_distance, + ctx->jastrow.een_rescaled_n); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4256,18 +4245,17 @@ qmckl_exit_code qmckl_provide_een_rescaled_n_deriv_e(qmckl_context context) ctx->jastrow.een_rescaled_n_deriv_e = een_rescaled_n_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_een_rescaled_n_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_en, - ctx->electron.coord_new.data, - ctx->nucleus.coord.data, - ctx->electron.en_distance, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.een_rescaled_n_deriv_e); + rc = qmckl_compute_factor_een_rescaled_n_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_en, + ctx->electron.coord_new.data, + ctx->nucleus.coord.data, + ctx->electron.en_distance, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.een_rescaled_n_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4758,14 +4746,13 @@ qmckl_exit_code qmckl_provide_cord_vect_full(qmckl_context context) ctx->jastrow.cord_vect_full = cord_vect_full; } - qmckl_exit_code rc = - qmckl_compute_cord_vect_full(context, - ctx->nucleus.num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.type_nucl_num, - ctx->jastrow.type_nucl_vector, - ctx->jastrow.cord_vector, - ctx->jastrow.cord_vect_full); + rc = qmckl_compute_cord_vect_full(context, + ctx->nucleus.num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.type_nucl_num, + ctx->jastrow.type_nucl_vector, + ctx->jastrow.cord_vector, + ctx->jastrow.cord_vect_full); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4809,11 +4796,10 @@ qmckl_exit_code qmckl_provide_lkpm_combined_index(qmckl_context context) ctx->jastrow.lkpm_combined_index = lkpm_combined_index; } - qmckl_exit_code rc = - qmckl_compute_lkpm_combined_index(context, - ctx->jastrow.cord_num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.lkpm_combined_index); + rc = qmckl_compute_lkpm_combined_index(context, + ctx->jastrow.cord_num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.lkpm_combined_index); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4858,15 +4844,14 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.tmp_c = tmp_c; } - qmckl_exit_code rc = - qmckl_compute_tmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); + rc = qmckl_compute_tmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4899,7 +4884,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; mem_info.size = (ctx->jastrow.cord_num) * (ctx->jastrow.cord_num + 1) - * 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); + ,* 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); double* dtmp_c = (double*) qmckl_malloc(context, mem_info); if (dtmp_c == NULL) { @@ -4911,15 +4896,14 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.dtmp_c = dtmp_c; } - qmckl_exit_code rc = - qmckl_compute_dtmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); + rc = qmckl_compute_dtmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); if (rc != QMCKL_SUCCESS) { return rc; } @@ -5943,18 +5927,17 @@ qmckl_exit_code qmckl_provide_factor_een(qmckl_context context) ctx->jastrow.factor_een = factor_een; } - qmckl_exit_code rc = - qmckl_compute_factor_een(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.cord_vect_full, - ctx->jastrow.lkpm_combined_index, - ctx->jastrow.tmp_c, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.factor_een); + rc = qmckl_compute_factor_een(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.cord_vect_full, + ctx->jastrow.lkpm_combined_index, + ctx->jastrow.tmp_c, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.factor_een); if (rc != QMCKL_SUCCESS) { return rc; } @@ -6456,20 +6439,19 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) ctx->jastrow.factor_een_deriv_e = factor_een_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_een_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.cord_vect_full, - ctx->jastrow.lkpm_combined_index, - ctx->jastrow.tmp_c, - ctx->jastrow.dtmp_c, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.een_rescaled_n_deriv_e, - ctx->jastrow.factor_een_deriv_e); + rc = qmckl_compute_factor_een_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.cord_vect_full, + ctx->jastrow.lkpm_combined_index, + ctx->jastrow.tmp_c, + ctx->jastrow.dtmp_c, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.een_rescaled_n_deriv_e, + ctx->jastrow.factor_een_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index d920396..0928db6 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -849,13 +849,13 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, int64_t n0 = nidx-4; n0 = n0 < 0 ? 0 : n0; - for (int64_t n=n0 ; n < nidx ; n+=1) { - const double* restrict ck = coef_normalized_t + idx[n]*mo_num; - const double a1 = av1[n]; - const double a2 = av2[n]; - const double a3 = av3[n]; - const double a4 = av4[n]; - const double a5 = av5[n]; + for (int64_t m=n0 ; m < nidx ; m+=1) { + const double* restrict ck = coef_normalized_t + idx[m]*mo_num; + const double a1 = av1[m]; + const double a2 = av2[m]; + const double a3 = av3[m]; + const double a4 = av4[m]; + const double a5 = av5[m]; #ifdef HAVE_OPENMP #pragma omp simd diff --git a/org/qmckl_sherman_morrison_woodbury.org b/org/qmckl_sherman_morrison_woodbury.org index 598ad32..ae358e8 100644 --- a/org/qmckl_sherman_morrison_woodbury.org +++ b/org/qmckl_sherman_morrison_woodbury.org @@ -965,7 +965,7 @@ qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context, rc = qmckl_woodbury_3(context, LDS, Dim, Updates_3block, Updates_index_3block, breakdown, Slater_inv, determinant); if (rc != 0) { // Send the entire block to slagel_splitting uint64_t l = 0; - rc = qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block, + (void) qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block, breakdown, Slater_inv, later_updates + (Dim * later), later_index + later, &l, determinant); later = later + l; } From 94035929e431eab6a475f287708a0fc5babb427a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 5 Apr 2022 11:44:17 +0200 Subject: [PATCH 035/100] Fixed cppcheck --- .github/workflows/test-build.yml | 2 - org/qmckl_ao.org | 3 +- org/qmckl_blas.org | 6 +- org/qmckl_jastrow.org | 300 +++++++++++------------- org/qmckl_mo.org | 14 +- org/qmckl_sherman_morrison_woodbury.org | 2 +- 6 files changed, 154 insertions(+), 173 deletions(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index c7af73f..12d4503 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -2,9 +2,7 @@ name: test-build on: push: - branches: [ master ] pull_request: - branches: [ master ] jobs: x86_ubuntu: diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 8dd1067..8289aa4 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -2634,9 +2634,10 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) { } } - rc = QMCKL_SUCCESS; #ifdef HAVE_HPC rc = qmckl_finalize_basis_hpc(context); +#else + rc = QMCKL_SUCCESS; #endif return rc; diff --git a/org/qmckl_blas.org b/org/qmckl_blas.org index 9cd7e18..1cf76e4 100644 --- a/org/qmckl_blas.org +++ b/org/qmckl_blas.org @@ -84,8 +84,8 @@ are not intended to be passed to external codes. #+begin_src c :comments org :tangle (eval h_private_type) :exports none typedef struct qmckl_vector { - int64_t size; double* restrict data; + int64_t size; } qmckl_vector; #+end_src @@ -160,8 +160,8 @@ qmckl_vector_free( qmckl_context context, #+begin_src c :comments org :tangle (eval h_private_type) :exports none typedef struct qmckl_matrix { - int64_t size[2]; double* restrict data; + int64_t size[2]; } qmckl_matrix; #+end_src @@ -245,9 +245,9 @@ qmckl_matrix_free( qmckl_context context, #define QMCKL_TENSOR_ORDER_MAX 16 typedef struct qmckl_tensor { + double* restrict data; int64_t order; int64_t size[QMCKL_TENSOR_ORDER_MAX]; - double* restrict data; } qmckl_tensor; #+end_src diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index ffbf713..6eaad62 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -837,7 +837,7 @@ qmckl_set_jastrow_type_nucl_vector(qmckl_context context, } if (ctx->jastrow.type_nucl_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.type_nucl_vector); + rc = qmckl_free(context, ctx->jastrow.type_nucl_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_type_nucl_vector", @@ -896,7 +896,7 @@ qmckl_set_jastrow_aord_vector(qmckl_context context, } if (ctx->jastrow.aord_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.aord_vector); + rc = qmckl_free(context, ctx->jastrow.aord_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_ord_vector", @@ -959,7 +959,7 @@ qmckl_set_jastrow_bord_vector(qmckl_context context, } if (ctx->jastrow.bord_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.bord_vector); + rc = qmckl_free(context, ctx->jastrow.bord_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_ord_vector", @@ -1029,7 +1029,7 @@ qmckl_set_jastrow_cord_vector(qmckl_context context, } if (ctx->jastrow.cord_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.cord_vector); + rc = qmckl_free(context, ctx->jastrow.cord_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_ord_vector", @@ -1388,12 +1388,11 @@ qmckl_exit_code qmckl_provide_asymp_jasb(qmckl_context context) ctx->jastrow.asymp_jasb = asymp_jasb; } - qmckl_exit_code rc = - qmckl_compute_asymp_jasb(context, - ctx->jastrow.bord_num, - ctx->jastrow.bord_vector, - rescale_factor_kappa_ee, - ctx->jastrow.asymp_jasb); + rc = qmckl_compute_asymp_jasb(context, + ctx->jastrow.bord_num, + ctx->jastrow.bord_vector, + rescale_factor_kappa_ee, + ctx->jastrow.asymp_jasb); if (rc != QMCKL_SUCCESS) { return rc; } @@ -1470,10 +1469,6 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( const double rescale_factor_kappa_ee, double* const asymp_jasb ) { - double kappa_inv, x, asym_one; - - kappa_inv = 1.0 / rescale_factor_kappa_ee; - if (context == QMCKL_NULL_CONTEXT){ return QMCKL_INVALID_CONTEXT; } @@ -1482,14 +1477,15 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( return QMCKL_INVALID_ARG_2; } - asym_one = bord_vector[0] * kappa_inv / (1.0 + bord_vector[1] * kappa_inv); + const double kappa_inv = 1.0 / rescale_factor_kappa_ee; + const double asym_one = bord_vector[0] * kappa_inv / (1.0 + bord_vector[1] * kappa_inv); asymp_jasb[0] = asym_one; asymp_jasb[1] = 0.5 * asym_one; for (int i = 0 ; i <= 1; ++i) { - x = kappa_inv; + double x = kappa_inv; for (int p = 1; p < bord_num; ++p){ - x = x * kappa_inv; + x *= kappa_inv; asymp_jasb[i] = asymp_jasb[i] + bord_vector[p + 1] * x; } } @@ -1672,16 +1668,15 @@ qmckl_exit_code qmckl_provide_factor_ee(qmckl_context context) ctx->jastrow.factor_ee = factor_ee; } - qmckl_exit_code rc = - qmckl_compute_factor_ee(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->electron.up_num, - ctx->jastrow.bord_num, - ctx->jastrow.bord_vector, - ctx->electron.ee_distance_rescaled, - ctx->jastrow.asymp_jasb, - ctx->jastrow.factor_ee); + rc = qmckl_compute_factor_ee(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->electron.up_num, + ctx->jastrow.bord_num, + ctx->jastrow.bord_vector, + ctx->electron.ee_distance_rescaled, + ctx->jastrow.asymp_jasb, + ctx->jastrow.factor_ee); if (rc != QMCKL_SUCCESS) { return rc; } @@ -2014,17 +2009,16 @@ qmckl_exit_code qmckl_provide_factor_ee_deriv_e(qmckl_context context) ctx->jastrow.factor_ee_deriv_e = factor_ee_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_ee_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->electron.up_num, - ctx->jastrow.bord_num, - ctx->jastrow.bord_vector, - ctx->electron.ee_distance_rescaled, - ctx->electron.ee_distance_rescaled_deriv_e, - ctx->jastrow.asymp_jasb, - ctx->jastrow.factor_ee_deriv_e); + rc = qmckl_compute_factor_ee_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->electron.up_num, + ctx->jastrow.bord_num, + ctx->jastrow.bord_vector, + ctx->electron.ee_distance_rescaled, + ctx->electron.ee_distance_rescaled_deriv_e, + ctx->jastrow.asymp_jasb, + ctx->jastrow.factor_ee_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -2437,21 +2431,20 @@ qmckl_exit_code qmckl_provide_factor_en(qmckl_context context) ctx->jastrow.factor_en = factor_en; } - qmckl_exit_code rc = - qmckl_compute_factor_en(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.type_nucl_num, - ctx->jastrow.type_nucl_vector, - ctx->jastrow.aord_num, - ctx->jastrow.aord_vector, - ctx->electron.en_distance_rescaled, - ctx->jastrow.factor_en); + rc = qmckl_compute_factor_en(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.type_nucl_num, + ctx->jastrow.type_nucl_vector, + ctx->jastrow.aord_num, + ctx->jastrow.aord_vector, + ctx->electron.en_distance_rescaled, + ctx->jastrow.factor_en); if (rc != QMCKL_SUCCESS) { return rc; } - + ctx->jastrow.factor_en_date = ctx->date; } @@ -2784,18 +2777,17 @@ qmckl_exit_code qmckl_provide_factor_en_deriv_e(qmckl_context context) ctx->jastrow.factor_en_deriv_e = factor_en_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_en_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.type_nucl_num, - ctx->jastrow.type_nucl_vector, - ctx->jastrow.aord_num, - ctx->jastrow.aord_vector, - ctx->electron.en_distance_rescaled, - ctx->electron.en_distance_rescaled_deriv_e, - ctx->jastrow.factor_en_deriv_e); + rc = qmckl_compute_factor_en_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.type_nucl_num, + ctx->jastrow.type_nucl_vector, + ctx->jastrow.aord_num, + ctx->jastrow.aord_vector, + ctx->electron.en_distance_rescaled, + ctx->electron.en_distance_rescaled_deriv_e, + ctx->jastrow.factor_en_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -3203,14 +3195,13 @@ qmckl_exit_code qmckl_provide_een_rescaled_e(qmckl_context context) ctx->jastrow.een_rescaled_e = een_rescaled_e; } - qmckl_exit_code rc = - qmckl_compute_een_rescaled_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_ee, - ctx->electron.ee_distance, - ctx->jastrow.een_rescaled_e); + rc = qmckl_compute_een_rescaled_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_ee, + ctx->electron.ee_distance, + ctx->jastrow.een_rescaled_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -3537,16 +3528,15 @@ qmckl_exit_code qmckl_provide_een_rescaled_e_deriv_e(qmckl_context context) ctx->jastrow.een_rescaled_e_deriv_e = een_rescaled_e_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_een_rescaled_e_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_ee, - ctx->electron.coord_new.data, - ctx->electron.ee_distance, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_e_deriv_e); + rc = qmckl_compute_factor_een_rescaled_e_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_ee, + ctx->electron.coord_new.data, + ctx->electron.ee_distance, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_e_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -3917,15 +3907,14 @@ qmckl_exit_code qmckl_provide_een_rescaled_n(qmckl_context context) ctx->jastrow.een_rescaled_n = een_rescaled_n; } - qmckl_exit_code rc = - qmckl_compute_een_rescaled_n(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_en, - ctx->electron.en_distance, - ctx->jastrow.een_rescaled_n); + rc = qmckl_compute_een_rescaled_n(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_en, + ctx->electron.en_distance, + ctx->jastrow.een_rescaled_n); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4256,18 +4245,17 @@ qmckl_exit_code qmckl_provide_een_rescaled_n_deriv_e(qmckl_context context) ctx->jastrow.een_rescaled_n_deriv_e = een_rescaled_n_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_een_rescaled_n_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_en, - ctx->electron.coord_new.data, - ctx->nucleus.coord.data, - ctx->electron.en_distance, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.een_rescaled_n_deriv_e); + rc = qmckl_compute_factor_een_rescaled_n_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_en, + ctx->electron.coord_new.data, + ctx->nucleus.coord.data, + ctx->electron.en_distance, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.een_rescaled_n_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4758,14 +4746,13 @@ qmckl_exit_code qmckl_provide_cord_vect_full(qmckl_context context) ctx->jastrow.cord_vect_full = cord_vect_full; } - qmckl_exit_code rc = - qmckl_compute_cord_vect_full(context, - ctx->nucleus.num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.type_nucl_num, - ctx->jastrow.type_nucl_vector, - ctx->jastrow.cord_vector, - ctx->jastrow.cord_vect_full); + rc = qmckl_compute_cord_vect_full(context, + ctx->nucleus.num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.type_nucl_num, + ctx->jastrow.type_nucl_vector, + ctx->jastrow.cord_vector, + ctx->jastrow.cord_vect_full); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4809,11 +4796,10 @@ qmckl_exit_code qmckl_provide_lkpm_combined_index(qmckl_context context) ctx->jastrow.lkpm_combined_index = lkpm_combined_index; } - qmckl_exit_code rc = - qmckl_compute_lkpm_combined_index(context, - ctx->jastrow.cord_num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.lkpm_combined_index); + rc = qmckl_compute_lkpm_combined_index(context, + ctx->jastrow.cord_num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.lkpm_combined_index); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4858,15 +4844,14 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.tmp_c = tmp_c; } - qmckl_exit_code rc = - qmckl_compute_tmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); + rc = qmckl_compute_tmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4899,7 +4884,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; mem_info.size = (ctx->jastrow.cord_num) * (ctx->jastrow.cord_num + 1) - * 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); + ,* 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); double* dtmp_c = (double*) qmckl_malloc(context, mem_info); if (dtmp_c == NULL) { @@ -4911,15 +4896,14 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.dtmp_c = dtmp_c; } - qmckl_exit_code rc = - qmckl_compute_dtmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); + rc = qmckl_compute_dtmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); if (rc != QMCKL_SUCCESS) { return rc; } @@ -5943,18 +5927,17 @@ qmckl_exit_code qmckl_provide_factor_een(qmckl_context context) ctx->jastrow.factor_een = factor_een; } - qmckl_exit_code rc = - qmckl_compute_factor_een(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.cord_vect_full, - ctx->jastrow.lkpm_combined_index, - ctx->jastrow.tmp_c, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.factor_een); + rc = qmckl_compute_factor_een(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.cord_vect_full, + ctx->jastrow.lkpm_combined_index, + ctx->jastrow.tmp_c, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.factor_een); if (rc != QMCKL_SUCCESS) { return rc; } @@ -6456,20 +6439,19 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) ctx->jastrow.factor_een_deriv_e = factor_een_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_een_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.cord_vect_full, - ctx->jastrow.lkpm_combined_index, - ctx->jastrow.tmp_c, - ctx->jastrow.dtmp_c, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.een_rescaled_n_deriv_e, - ctx->jastrow.factor_een_deriv_e); + rc = qmckl_compute_factor_een_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.cord_vect_full, + ctx->jastrow.lkpm_combined_index, + ctx->jastrow.tmp_c, + ctx->jastrow.dtmp_c, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.een_rescaled_n_deriv_e, + ctx->jastrow.factor_een_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index d920396..0928db6 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -849,13 +849,13 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, int64_t n0 = nidx-4; n0 = n0 < 0 ? 0 : n0; - for (int64_t n=n0 ; n < nidx ; n+=1) { - const double* restrict ck = coef_normalized_t + idx[n]*mo_num; - const double a1 = av1[n]; - const double a2 = av2[n]; - const double a3 = av3[n]; - const double a4 = av4[n]; - const double a5 = av5[n]; + for (int64_t m=n0 ; m < nidx ; m+=1) { + const double* restrict ck = coef_normalized_t + idx[m]*mo_num; + const double a1 = av1[m]; + const double a2 = av2[m]; + const double a3 = av3[m]; + const double a4 = av4[m]; + const double a5 = av5[m]; #ifdef HAVE_OPENMP #pragma omp simd diff --git a/org/qmckl_sherman_morrison_woodbury.org b/org/qmckl_sherman_morrison_woodbury.org index 598ad32..ae358e8 100644 --- a/org/qmckl_sherman_morrison_woodbury.org +++ b/org/qmckl_sherman_morrison_woodbury.org @@ -965,7 +965,7 @@ qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context, rc = qmckl_woodbury_3(context, LDS, Dim, Updates_3block, Updates_index_3block, breakdown, Slater_inv, determinant); if (rc != 0) { // Send the entire block to slagel_splitting uint64_t l = 0; - rc = qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block, + (void) qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block, breakdown, Slater_inv, later_updates + (Dim * later), later_index + later, &l, determinant); later = later + l; } From 586eb928013f34376fc9ac8a4d54b2384fec2977 Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Tue, 5 Apr 2022 14:23:20 +0200 Subject: [PATCH 036/100] compute_cord_vect_full done --- org/qmckl_jastrow.org | 123 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 101 insertions(+), 22 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 5d600f6..14e1f1e 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5055,7 +5055,7 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( | ~cord_vect_full~ | ~double[dim_cord_vect][nucl_num]~ | out | Full list of coefficients | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_cord_vect_full_f( & +integer function qmckl_compute_cord_vect_full_doc_f( & context, nucl_num, dim_cord_vect, type_nucl_num, & type_nucl_vector, cord_vector, cord_vect_full) & result(info) @@ -5098,29 +5098,14 @@ integer function qmckl_compute_cord_vect_full_f( & cord_vect_full(a,1:dim_cord_vect) = cord_vector(type_nucl_vector(a),1:dim_cord_vect) end do -end function qmckl_compute_cord_vect_full_f +end function qmckl_compute_cord_vect_full_doc_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_cord_vect_full_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org - qmckl_exit_code qmckl_compute_cord_vect_full ( - const qmckl_context context, - const int64_t nucl_num, - const int64_t dim_cord_vect, - const int64_t type_nucl_num, - const int64_t* type_nucl_vector, - const double* cord_vector, - double* const cord_vect_full ); - #+end_src - - - #+CALL: generate_c_interface(table=qmckl_factor_cord_vect_full_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + #+CALL: generate_c_interface(table=qmckl_factor_cord_vect_full_args,rettyp=get_value("CRetType"),fname="qmckl_compute_cord_vect_full_doc") #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_cord_vect_full & + integer(c_int32_t) function qmckl_compute_cord_vect_full_doc & (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) & bind(C) result(info) @@ -5135,13 +5120,106 @@ end function qmckl_compute_cord_vect_full_f real (c_double ) , intent(in) :: cord_vector(type_nucl_num,dim_cord_vect) real (c_double ) , intent(out) :: cord_vect_full(nucl_num,dim_cord_vect) - integer(c_int32_t), external :: qmckl_compute_cord_vect_full_f - info = qmckl_compute_cord_vect_full_f & + integer(c_int32_t), external :: qmckl_compute_cord_vect_full_doc_f + info = qmckl_compute_cord_vect_full_doc_f & (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) - end function qmckl_compute_cord_vect_full + end function qmckl_compute_cord_vect_full_doc #+end_src + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_cord_vect_full_hpc ( + const qmckl_context context, + const int64_t nucl_num, + const int64_t dim_cord_vect, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const double* cord_vector, + double* const cord_vect_full ) { + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (nucl_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (type_nucl_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + if (dim_cord_vect <= 0) { + return QMCKL_INVALID_ARG_5; + } + + for (int i=0; i < dim_cord_vect; ++i) { + for (int a=0; a < nucl_num; ++a){ + cord_vect_full[a + i*nucl_num] = cord_vector[(type_nucl_vector[a]-1)+i*type_nucl_num]; + } + } + + return QMCKL_SUCCESS; + } + #+end_src + + + #+CALL: generate_c_header(table=qmckl_factor_cord_vect_full_args,rettyp=get_value("CRetType"),fname="qmckl_compute_cord_vect_full_doc") + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org + qmckl_exit_code qmckl_compute_cord_vect_full ( + const qmckl_context context, + const int64_t nucl_num, + const int64_t dim_cord_vect, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const double* cord_vector, + double* const cord_vect_full ); + #+end_src + + #+begin_src c :tangle (eval h_private_func) :comments org + qmckl_exit_code qmckl_compute_cord_vect_full_doc ( + const qmckl_context context, + const int64_t nucl_num, + const int64_t dim_cord_vect, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const double* cord_vector, + double* const cord_vect_full ); + #+end_src + + #+begin_src c :tangle (eval h_private_func) :comments org + qmckl_exit_code qmckl_compute_cord_vect_full_hpc ( + const qmckl_context context, + const int64_t nucl_num, + const int64_t dim_cord_vect, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const double* cord_vector, + double* const cord_vect_full ); + #+end_src + + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_cord_vect_full ( + const qmckl_context context, + const int64_t nucl_num, + const int64_t dim_cord_vect, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const double* cord_vector, + double* const cord_vect_full ) { + + #ifdef HAVE_HPC + return qmckl_compute_cord_vect_full_hpc(context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full); + #else + return qmckl_compute_cord_vect_full_doc(context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full); + #endif + } + #+end_src + + + *** Compute lkpm_combined_index :PROPERTIES: :Name: qmckl_compute_lkpm_combined_index @@ -6339,6 +6417,7 @@ double factor_een[walk_num]; rc = qmckl_get_jastrow_factor_een(context, &(factor_een[0]),walk_num); assert(fabs(factor_een[0] + 0.37407972141304213) < 1e-12); +return QMCKL_SUCCESS; #+end_src ** Electron-electron-nucleus Jastrow \(f_{een}\) derivative From eb71a752f5bfc8a3afa04d0f0995078163bd35c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Delval?= Date: Tue, 5 Apr 2022 14:28:35 +0200 Subject: [PATCH 037/100] Fixed naive GPU kernels and ignored variable issue --- org/ao_grid.f90 | 114 -------------------------- org/qmckl_jastrow.org | 186 +++++++++++++++++++++++++++--------------- 2 files changed, 120 insertions(+), 180 deletions(-) delete mode 100644 org/ao_grid.f90 diff --git a/org/ao_grid.f90 b/org/ao_grid.f90 deleted file mode 100644 index 685313f..0000000 --- a/org/ao_grid.f90 +++ /dev/null @@ -1,114 +0,0 @@ -subroutine qmckl_check_error(rc, message) - use qmckl - implicit none - integer(qmckl_exit_code), intent(in) :: rc - character(len=*) , intent(in) :: message - character(len=128) :: str_buffer - if (rc /= QMCKL_SUCCESS) then - print *, message - call qmckl_string_of_error(rc, str_buffer) - print *, str_buffer - call exit(rc) - end if -end subroutine qmckl_check_error - -program ao_grid - use qmckl - implicit none - - integer(qmckl_context) :: qmckl_ctx ! QMCkl context - integer(qmckl_exit_code) :: rc ! Exit code of QMCkl functions - - character(len=128) :: trexio_filename - character(len=128) :: str_buffer - integer :: ao_id - integer :: point_num_x - - integer(c_int64_t) :: nucl_num - double precision, allocatable :: nucl_coord(:,:) - - integer(c_int64_t) :: point_num - integer(c_int64_t) :: ao_num - integer(c_int64_t) :: ipoint, i, j, k - double precision :: x, y, z, dr(3) - double precision :: rmin(3), rmax(3) - double precision, allocatable :: points(:,:) - double precision, allocatable :: ao_vgl(:,:,:) - -if (iargc() /= 3) then - print *, 'Syntax: ao_grid ' - call exit(-1) -end if -call getarg(1, trexio_filename) -call getarg(2, str_buffer) -read(str_buffer, *) ao_id -call getarg(3, str_buffer) -read(str_buffer, *) point_num_x - -if (point_num_x < 0 .or. point_num_x > 300) then - print *, 'Error: 0 < point_num < 300' - call exit(-1) -end if - -qmckl_ctx = qmckl_context_create() -rc = qmckl_trexio_read(qmckl_ctx, trexio_filename, 1_8*len(trim(trexio_filename))) -call qmckl_check_error(rc, 'Read TREXIO') - -rc = qmckl_get_ao_basis_ao_num(qmckl_ctx, ao_num) -call qmckl_check_error(rc, 'Getting ao_num') - -if (ao_id < 0 .or. ao_id > ao_num) then - print *, 'Error: 0 < ao_id < ', ao_num - call exit(-1) -end if - -rc = qmckl_get_nucleus_num(qmckl_ctx, nucl_num) -call qmckl_check_error(rc, 'Get nucleus num') - -allocate( nucl_coord(3, nucl_num) ) -rc = qmckl_get_nucleus_coord(qmckl_ctx, 'N', nucl_coord, 3_8*nucl_num) -call qmckl_check_error(rc, 'Get nucleus coord') - -rmin(1) = minval( nucl_coord(1,:) ) - 5.d0 -rmin(2) = minval( nucl_coord(2,:) ) - 5.d0 -rmin(3) = minval( nucl_coord(3,:) ) - 5.d0 - -rmax(1) = maxval( nucl_coord(1,:) ) + 5.d0 -rmax(2) = maxval( nucl_coord(2,:) ) + 5.d0 -rmax(3) = maxval( nucl_coord(3,:) ) + 5.d0 - -dr(1:3) = (rmax(1:3) - rmin(1:3)) / dble(point_num_x-1) - -point_num = point_num_x**3 -allocate( points(point_num, 3) ) -ipoint=0 -z = rmin(3) -do k=1,point_num_x - y = rmin(2) - do j=1,point_num_x - x = rmin(1) - do i=1,point_num_x - ipoint = ipoint+1 - points(ipoint,1) = x - points(ipoint,2) = y - points(ipoint,3) = z - x = x + dr(1) - end do - y = y + dr(2) - end do - z = z + dr(3) -end do - -rc = qmckl_set_point(qmckl_ctx, 'T', points, point_num) -call qmckl_check_error(rc, 'Setting points') - -allocate( ao_vgl(ao_num, 5, point_num) ) -rc = qmckl_get_ao_basis_ao_vgl(qmckl_ctx, ao_vgl, ao_num*5_8*point_num) -call qmckl_check_error(rc, 'Setting points') - -do ipoint=1, point_num - print '(3(F16.10,X),E20.10)', points(ipoint, 1:3), ao_vgl(ao_id,1,ipoint) -end do - -deallocate( nucl_coord, points, ao_vgl ) -end program ao_grid diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 46bb5da..e69088c 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -4889,57 +4889,65 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) } ctx->jastrow.tmp_c = tmp_c; } + /* Choose the correct compute function (depending on offload type) */ - bool default_compute = true; + switch(ctx->jastrow.offload_type) { + case OFFLOAD_OPENACC: + #ifdef HAVE_OPENACC_OFFLOAD + rc = + qmckl_compute_tmp_c_acc_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + #elif + rc = qmckl_compute_tmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); - #ifdef HAVE_OPENACC_OFFLOAD - if(ctx->jastrow.offload_type == OFFLOAD_OPENACC) { - qmckl_exit_code rc = - qmckl_compute_tmp_c_acc_offload(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - default_compute = false; - if (rc != QMCKL_SUCCESS) { - return rc; - } + #endif + break; + case OFFLOAD_CUBLAS: + #ifdef HAVE_CUBLAS_OFFLOAD + rc = + qmckl_compute_tmp_c_cublas_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + #elif + rc = qmckl_compute_tmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + #endif + break; + default: + rc = qmckl_compute_tmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + break; } - #endif - - #ifdef HAVE_CUBLAS_OFFLOAD - if(ctx->jastrow.offload_type == OFFLOAD_CUBLAS) { - qmckl_exit_code rc = - qmckl_compute_tmp_c_cublas_offload(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - default_compute = false; - if (rc != QMCKL_SUCCESS) { - return rc; - } - } - #endif - - rc = qmckl_compute_tmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - if (rc != QMCKL_SUCCESS) { - return rc; - } - ctx->jastrow.tmp_c_date = ctx->date; } @@ -4980,15 +4988,61 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.dtmp_c = dtmp_c; } + switch(ctx->jastrow.offload_type) { + case OFFLOAD_OPENACC: + #ifdef HAVE_OPENACC_OFFLOAD + rc = qmckl_compute_dtmp_c_acc_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); + #elif + rc = qmckl_compute_dtmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); + #endif + break; + case OFFLOAD_CUBLAS: + #ifdef HAVE_CUBLAS_OFFLOAD + rc = qmckl_compute_dtmp_c_acc_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); + #elif + rc = qmckl_compute_dtmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); + #endif + break; + default: + rc = qmckl_compute_dtmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); + break; + } - rc = qmckl_compute_dtmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); if (rc != QMCKL_SUCCESS) { return rc; } @@ -5617,7 +5671,6 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload ( const double* een_rescaled_n, double* const tmp_c ) { - if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; } @@ -5649,8 +5702,8 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload ( int stride_j_n = stride_k_n * nucl_num; int stride_nw_n = stride_j_n * (cord_num+1); - //#pragma acc parallel - //#pragma acc loop independent gang worker vector collapse(5) + #pragma acc parallel + #pragma acc loop independent gang worker vector collapse(5) for (int nw=0; nw < walk_num; ++nw) { for (int i=0; i Date: Tue, 5 Apr 2022 14:37:57 +0200 Subject: [PATCH 038/100] Fix preprocessor else and remove old cuBLAS interface --- org/qmckl_blas.org | 92 ------------------------------------------- org/qmckl_jastrow.org | 8 ++-- 2 files changed, 4 insertions(+), 96 deletions(-) diff --git a/org/qmckl_blas.org b/org/qmckl_blas.org index 4f83705..1cf76e4 100644 --- a/org/qmckl_blas.org +++ b/org/qmckl_blas.org @@ -2288,98 +2288,6 @@ qmckl_transpose (qmckl_context context, #+end_src -* cuBLAS interface (optional) -We propose a cuBLAS version of some QMCkl kernels. However, because cuBLAS is written in C, we need to define a Fortran interface for it. We start by defining functions to manage the cuBLAS handle structure from Fortran, before writing interfaces for the specific cuBLAS functions we are interested in. - -TODO These are the C functions that are supposed to be called from Fortran. We still need to write the interfaces themselves. - -#+begin_src c :tangle (eval h_private_func) :comments org -#ifdef HAVE_CUBLAS_OFFLOAD -#include -#endif -#+end_src - -#+begin_src c :tangle (eval h_private_func) :comments org -#ifdef HAVE_CUBLAS_OFFLOAD -cublasHandle_t* get_cublas_handle_interfaced(); -#endif -#+end_src - -#+begin_src c :comments org :tangle (eval c) :exports none -#ifdef HAVE_CUBLAS_OFFLOAD -cublasHandle_t* get_cublas_handle_interfaced() { - cublasHandle_t* handle = malloc(sizeof(cublasHandle_t)); - - cublasStatus_t status = cublasCreate(handle); - if (status != CUBLAS_STATUS_SUCCESS){ - fprintf(stderr, "Error while initializing cuBLAS\n"); - exit(1); - } - - return handle; -} -#endif -#+end_src - -#+begin_src c :tangle (eval h_private_func) :comments org -#ifdef HAVE_CUBLAS_OFFLOAD -void destroy_cublas_handle_interfaced(cublasHandle_t* handle); -#endif -#+end_src - -#+begin_src c :comments org :tangle (eval c) :exports none -#ifdef HAVE_CUBLAS_OFFLOAD -void destroy_cublas_handle_interfaced(cublasHandle_t* handle) { - if(handle != NULL) { - free(handle); - } -} -#endif -#+end_src - -** DGEMM - -#+begin_src c :tangle (eval h_private_func) :comments org -#ifdef HAVE_CUBLAS_OFFLOAD -cublasStatus_t cublasDgemm_f( - cublasHandle_t* handle, - cublasOperation_t* transa, cublasOperation_t* transb, - int* m, int* n, int* k, - const double* alpha, - const double*A, int* lda, - const double* B, int* ldb, - const double* beta, - double*C, int* ldc -); -#endif -#+end_src - -#+begin_src c :comments org :tangle (eval c) :exports none - -#ifdef HAVE_CUBLAS_OFFLOAD -cublasStatus_t cublasDgemm_f( - cublasHandle_t* handle, - cublasOperation_t* transa, cublasOperation_t* transb, - int* m, int* n, int* k, - const double* alpha, - const double*A, int* lda, - const double* B, int* ldb, - const double* beta, - double*C, int* ldc -) { - return cublasDgemm_f( - handle, - transa, transb, - m, n, k, - alpha, A, lda, B,ldb, - beta, C, ldc - ); -} -#endif -#+end_src - - - * End of files :noexport: diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index e69088c..970feb7 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -4903,7 +4903,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.tmp_c); - #elif + #else rc = qmckl_compute_tmp_c(context, ctx->jastrow.cord_num, ctx->electron.num, @@ -4926,7 +4926,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.tmp_c); - #elif + #else rc = qmckl_compute_tmp_c(context, ctx->jastrow.cord_num, ctx->electron.num, @@ -4999,7 +4999,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e_deriv_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.dtmp_c); - #elif + #else rc = qmckl_compute_dtmp_c(context, ctx->jastrow.cord_num, ctx->electron.num, @@ -5020,7 +5020,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e_deriv_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.dtmp_c); - #elif + #else rc = qmckl_compute_dtmp_c(context, ctx->jastrow.cord_num, ctx->electron.num, From 63c7f8ea72d4c9c14e4c97f28edf64d883d9a9d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Delval?= Date: Tue, 5 Apr 2022 16:29:52 +0200 Subject: [PATCH 039/100] Replace placeholder cuBLAS kernels with new C HPC implementation --- org/qmckl_jastrow.org | 110 +++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 54 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 970feb7..c602d84 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5783,17 +5783,6 @@ qmckl_exit_code qmckl_compute_tmp_c_cublas_offload ( const double* een_rescaled_n, double* const tmp_c ) { - qmckl_exit_code info; - int i, j, a, l, kk, p, lmax, nw; - char TransA, TransB; - double alpha, beta; - int M, N, K, LDA, LDB, LDC; - - TransA = 'N'; - TransB = 'N'; - alpha = 1.0; - beta = 0.0; - if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; } @@ -5810,29 +5799,40 @@ qmckl_exit_code qmckl_compute_tmp_c_cublas_offload ( return QMCKL_INVALID_ARG_4; } - M = elec_num; - N = nucl_num*(cord_num + 1); - K = elec_num; + if (walk_num <= 0) { + return QMCKL_INVALID_ARG_5; + } - LDA = sizeof(een_rescaled_e)/sizeof(double); - LDB = sizeof(een_rescaled_n)/sizeof(double); - LDC = sizeof(tmp_c)/sizeof(double); + qmckl_exit_code info = QMCKL_SUCCESS; - // TODO Replace with cuBLAS calls - for (int nw=0; nw < walk_num; ++nw) { - for (int i=0; i Date: Tue, 5 Apr 2022 16:52:35 +0200 Subject: [PATCH 040/100] Fix info --- org/qmckl_mo.org | 1 + 1 file changed, 1 insertion(+) diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index 0928db6..5010283 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -655,6 +655,7 @@ integer function qmckl_compute_mo_basis_mo_vgl_doc_f(context, & end if end do end do + info = QMCKL_SUCCESS end function qmckl_compute_mo_basis_mo_vgl_doc_f #+end_src From 0489831e18118b9d9888058729fa59302465bc86 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 5 Apr 2022 17:06:29 +0200 Subject: [PATCH 041/100] Simplified configure --- configure.ac | 50 ++++++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/configure.ac b/configure.ac index ffa8f99..474442e 100644 --- a/configure.ac +++ b/configure.ac @@ -137,10 +137,10 @@ case "$with_chameleon" in [PKG_CFLAGS="$PKG_CFLAGS $LIBCHAMELEON_CFLAGS" PKG_LIBS="$PKG_LIBS $LIBCHAMELEON_LIBS"] ,[ - + ## something went wrong. ## try to find the package without pkg-config - + ## check that the library is actually new enough. ## by testing for a 1.0.0+ function which we use AC_CHECK_LIB(chameleon,CHAMELEON_finalize,[LIBCHAMELEON_LIBS="-lchameleon"]) @@ -205,18 +205,11 @@ case $FC in ;; *nvfortran*) - FCFLAGS="$FCFLAGS -fPIC -Mnomain -mp -target=gpu" + FCFLAGS="$FCFLAGS -fPIC -Mnomain" ;; esac -case $CC in - - *nvc*) - CFLAGS="$CFLAGS -fPIC -mp -target=gpu" - ;; -esac - # Options. AC_ARG_ENABLE(hpc, [AS_HELP_STRING([--enable-hpc],[Use HPC-optimized functions])], HAVE_HPC=$enableval, HAVE_HPC=no) @@ -243,25 +236,38 @@ fi ## Enable GPU offloading # OpenACC offloading -AC_ARG_ENABLE(openacc-offload, [AS_HELP_STRING([--openacc-offload],[Use OpenACC-offloaded functions])], HAVE_OPENACC_OFFLOAD=$enableval, HAVE_OPENACC_OFFLOAD=no) +AC_ARG_ENABLE(enable-openacc, [AS_HELP_STRING([--enable-openacc],[Use OpenACC-offloaded functions])], HAVE_OPENACC_OFFLOAD=$enableval, HAVE_OPENACC_OFFLOAD=no) AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ - AC_DEFINE([HAVE_OPENACC_OFFLOAD], [1], [If defined, activate OpenACC-offloaded routines]) - CFLAGS="$OFFLOAD_FLAGS $OFFLOAD_CFLAGS $CFLAGS" - FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS -DHAVE_OPENACC_OFFLOAD $FCFLAGS" + AC_DEFINE([HAVE_OPENACC_OFFLOAD], [1], [If defined, activate OpenACC-offloaded routines]) + case $CC in + + *gcc*) + CFLAGS="$CFLAGS -fPIC -fopenacc" + ;; + *nvc*) + CFLAGS="$CFLAGS -fPIC -mp -target=gpu" + ;; + esac + + CFLAGS="$CFLAGS" +# FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS -DHAVE_OPENACC_OFFLOAD $FCFLAGS" ]) # cuBLAS offloading -AC_ARG_ENABLE(cublas-offload, [AS_HELP_STRING([--cublas-offload],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$enableval, HAVE_CUBLAS_OFFLOAD=no) +AC_ARG_ENABLE(enable-cublas, [AS_HELP_STRING([--enable-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$enableval, HAVE_CUBLAS_OFFLOAD=no) AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ - AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines]) - FCFLAGS="-DHAVE_CUBLAS_OFFLOAD" + AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines]) + case $CC in + + *gcc*) + CFLAGS="$CFLAGS -fPIC -fopenacc" + ;; + *nvc*) + CFLAGS="$CFLAGS -fPIC -mp -target=gpu" + ;; + esac ]) -# General offload -AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes" || test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ - CFLAGS="$OFFLOAD_FLAGS $OFFLOAD_CFLAGS $CFLAGS" - FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS $FCFLAGS" -]) ## From 08f01ece894837da3328f7c352beca1399ebf8ad Mon Sep 17 00:00:00 2001 From: 2323 Date: Tue, 5 Apr 2022 17:57:56 +0200 Subject: [PATCH 042/100] Fix configure --- configure.ac | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/configure.ac b/configure.ac index 474442e..8013725 100644 --- a/configure.ac +++ b/configure.ac @@ -210,6 +210,16 @@ case $FC in esac +case $CC in + + *gcc*) + CFLAGS="$CFLAGS -fPIC" + ;; + *nvc*) + CFLAGS="$CFLAGS -fPIC" + ;; +esac + # Options. AC_ARG_ENABLE(hpc, [AS_HELP_STRING([--enable-hpc],[Use HPC-optimized functions])], HAVE_HPC=$enableval, HAVE_HPC=no) @@ -236,34 +246,32 @@ fi ## Enable GPU offloading # OpenACC offloading -AC_ARG_ENABLE(enable-openacc, [AS_HELP_STRING([--enable-openacc],[Use OpenACC-offloaded functions])], HAVE_OPENACC_OFFLOAD=$enableval, HAVE_OPENACC_OFFLOAD=no) +AC_ARG_ENABLE(openacc, [AS_HELP_STRING([--enable-openacc],[Use OpenACC-offloaded functions])], HAVE_OPENACC_OFFLOAD=$enableval, HAVE_OPENACC_OFFLOAD=no) AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ AC_DEFINE([HAVE_OPENACC_OFFLOAD], [1], [If defined, activate OpenACC-offloaded routines]) case $CC in *gcc*) - CFLAGS="$CFLAGS -fPIC -fopenacc" + CFLAGS="$CFLAGS -fopenacc" ;; *nvc*) - CFLAGS="$CFLAGS -fPIC -mp -target=gpu" + CFLAGS="$CFLAGS -mp -target=gpu" ;; esac - CFLAGS="$CFLAGS" -# FCFLAGS="$OFFLOAD_FLAGS $OFFLOAD_FCFLAGS -DHAVE_OPENACC_OFFLOAD $FCFLAGS" ]) # cuBLAS offloading -AC_ARG_ENABLE(enable-cublas, [AS_HELP_STRING([--enable-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$enableval, HAVE_CUBLAS_OFFLOAD=no) +AC_ARG_ENABLE(cublas, [AS_HELP_STRING([--enable-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$enableval, HAVE_CUBLAS_OFFLOAD=no) AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines]) case $CC in *gcc*) - CFLAGS="$CFLAGS -fPIC -fopenacc" + CFLAGS="$CFLAGS -fopenacc" ;; *nvc*) - CFLAGS="$CFLAGS -fPIC -mp -target=gpu" + CFLAGS="$CFLAGS -mp -target=gpu" ;; esac ]) From f02e761b7939888220328220c4ad398c777f7c24 Mon Sep 17 00:00:00 2001 From: 2323 Date: Tue, 5 Apr 2022 19:31:11 +0200 Subject: [PATCH 043/100] Fixed configure.ac for GPUs --- configure.ac | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 8013725..38df570 100644 --- a/configure.ac +++ b/configure.ac @@ -255,7 +255,17 @@ AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ CFLAGS="$CFLAGS -fopenacc" ;; *nvc*) - CFLAGS="$CFLAGS -mp -target=gpu" + CFLAGS="$CFLAGS -acc -mp -target=gpu" + ;; + esac + + case $FC in + + *gfortran*) + FCFLAGS="$FCFLAGS -fopenacc" + ;; + *nvfortran*) + FCFLAGS="$FCFLAGS -acc -mp -target=gpu" ;; esac @@ -271,7 +281,17 @@ AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ CFLAGS="$CFLAGS -fopenacc" ;; *nvc*) - CFLAGS="$CFLAGS -mp -target=gpu" + CFLAGS="$CFLAGS -acc -mp -target=gpu" + ;; + esac + + case $FC in + + *gfortran*) + FCFLAGS="$FCFLAGS -fopenacc" + ;; + *nvfortran*) + FCFLAGS="$FCFLAGS -acc -mp -target=gpu" ;; esac ]) From 72fad819bf35cdf96a96b9d6ffc0880c570d014f Mon Sep 17 00:00:00 2001 From: 2323 Date: Wed, 6 Apr 2022 10:03:56 +0200 Subject: [PATCH 044/100] Fix flags --- configure.ac | 9 +++++---- org/qmckl_jastrow.org | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/configure.ac b/configure.ac index 38df570..835f3f9 100644 --- a/configure.ac +++ b/configure.ac @@ -93,6 +93,7 @@ AC_PROG_F77 m4_version_prereq([2.70],[], [AC_PROG_CC_C99]) AS_IF([test "$ac_cv_prog_cc_c99" = "no"], [AC_MSG_ERROR([The compiler does not support C99])]) AC_PROG_CC_C_O +AM_PROG_CC_C_O AC_PROG_FC AC_PROG_FC_C_O AC_FC_PP_DEFINE @@ -255,7 +256,7 @@ AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ CFLAGS="$CFLAGS -fopenacc" ;; *nvc*) - CFLAGS="$CFLAGS -acc -mp -target=gpu" + CFLAGS="$CFLAGS -acc" ;; esac @@ -265,7 +266,7 @@ AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ FCFLAGS="$FCFLAGS -fopenacc" ;; *nvfortran*) - FCFLAGS="$FCFLAGS -acc -mp -target=gpu" + FCFLAGS="$FCFLAGS -acc" ;; esac @@ -281,7 +282,7 @@ AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ CFLAGS="$CFLAGS -fopenacc" ;; *nvc*) - CFLAGS="$CFLAGS -acc -mp -target=gpu" + CFLAGS="$CFLAGS -acc" ;; esac @@ -291,7 +292,7 @@ AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ FCFLAGS="$FCFLAGS -fopenacc" ;; *nvfortran*) - FCFLAGS="$FCFLAGS -acc -mp -target=gpu" + FCFLAGS="$FCFLAGS -acc" ;; esac ]) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index c602d84..017e372 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -737,7 +737,7 @@ qmckl_exit_code qmckl_get_jastrow_offload_type (const qmckl_context context, qmc "offload_type is a null pointer"); } - qmckl_context_struct* const ctx = (qmckl_context_struct* const) context; + qmckl_context_struct* const ctx = (qmckl_context_struct*) context; assert (ctx != NULL); int32_t mask = 1 << 0; From 0966e1e2b1f38082419b00d760131579de40bfc4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 6 Apr 2022 10:42:00 +0200 Subject: [PATCH 045/100] Fix OpenACC --- org/qmckl_jastrow.org | 1060 +++++++++++++++++++++-------------------- 1 file changed, 540 insertions(+), 520 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 017e372..7b5bcee 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -1509,11 +1509,11 @@ end function qmckl_compute_asymp_jasb_f #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_asymp_jasb ( - const qmckl_context context, - const int64_t bord_num, - const double* bord_vector, - const double rescale_factor_kappa_ee, - double* const asymp_jasb ) { + const qmckl_context context, + const int64_t bord_num, + const double* bord_vector, + const double rescale_factor_kappa_ee, + double* const asymp_jasb ) { if (context == QMCKL_NULL_CONTEXT){ return QMCKL_INVALID_CONTEXT; @@ -1545,11 +1545,11 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_asymp_jasb ( - const qmckl_context context, - const int64_t bord_num, - const double* bord_vector, - const double rescale_factor_kappa_ee, - double* const asymp_jasb ); + const qmckl_context context, + const int64_t bord_num, + const double* bord_vector, + const double rescale_factor_kappa_ee, + double* const asymp_jasb ); #+end_src @@ -1827,15 +1827,15 @@ end function qmckl_compute_factor_ee_f #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_factor_ee ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t up_num, - const int64_t bord_num, - const double* bord_vector, - const double* ee_distance_rescaled, - const double* asymp_jasb, - double* const factor_ee ) { + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* asymp_jasb, + double* const factor_ee ) { int ipar; // can we use a smaller integer? double x, x1, spin_fact, power_ser; @@ -1896,15 +1896,15 @@ qmckl_exit_code qmckl_compute_factor_ee ( #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_factor_ee ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t up_num, - const int64_t bord_num, - const double* bord_vector, - const double* ee_distance_rescaled, - const double* asymp_jasb, - double* const factor_ee ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* asymp_jasb, + double* const factor_ee ); #+end_src @@ -2207,16 +2207,16 @@ end function qmckl_compute_factor_ee_deriv_e_f #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_factor_ee_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t up_num, - const int64_t bord_num, - const double* bord_vector, - const double* ee_distance_rescaled, - const double* ee_distance_rescaled_deriv_e, - const double* asymp_jasb, - double* const factor_ee_deriv_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* ee_distance_rescaled_deriv_e, + const double* asymp_jasb, + double* const factor_ee_deriv_e ); #+end_src @@ -2225,17 +2225,17 @@ end function qmckl_compute_factor_ee_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & - (context, & - walk_num, & - elec_num, & - up_num, & - bord_num, & - bord_vector, & - ee_distance_rescaled, & - ee_distance_rescaled_deriv_e, & - asymp_jasb, & - factor_ee_deriv_e) & - bind(C) result(info) + (context, & + walk_num, & + elec_num, & + up_num, & + bord_num, & + bord_vector, & + ee_distance_rescaled, & + ee_distance_rescaled_deriv_e, & + asymp_jasb, & + factor_ee_deriv_e) & + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -2253,16 +2253,16 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & integer(c_int32_t), external :: qmckl_compute_factor_ee_deriv_e_f info = qmckl_compute_factor_ee_deriv_e_f & - (context, & - walk_num, & - elec_num, & - up_num, & - bord_num, & - bord_vector, & - ee_distance_rescaled, & - ee_distance_rescaled_deriv_e, & - asymp_jasb, & - factor_ee_deriv_e) + (context, & + walk_num, & + elec_num, & + up_num, & + bord_num, & + bord_vector, & + ee_distance_rescaled, & + ee_distance_rescaled_deriv_e, & + asymp_jasb, & + factor_ee_deriv_e) end function qmckl_compute_factor_ee_deriv_e #+end_src @@ -2593,16 +2593,16 @@ end function qmckl_compute_factor_en_f #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_factor_en ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t type_nucl_num, - const int64_t* type_nucl_vector, - const int64_t aord_num, - const double* aord_vector, - const double* en_distance_rescaled, - double* const factor_en ) { + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const int64_t aord_num, + const double* aord_vector, + const double* en_distance_rescaled, + double* const factor_en ) { double x, x1, power_ser; @@ -2681,16 +2681,16 @@ qmckl_exit_code qmckl_compute_factor_en ( #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_factor_en ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t type_nucl_num, - const int64_t* type_nucl_vector, - const int64_t aord_num, - const double* aord_vector, - const double* en_distance_rescaled, - double* const factor_en ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const int64_t aord_num, + const double* aord_vector, + const double* en_distance_rescaled, + double* const factor_en ); #+end_src @@ -2975,17 +2975,17 @@ end function qmckl_compute_factor_en_deriv_e_f #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_factor_en_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t type_nucl_num, - const int64_t* type_nucl_vector, - const int64_t aord_num, - const double* aord_vector, - const double* en_distance_rescaled, - const double* en_distance_rescaled_deriv_e, - double* const factor_en_deriv_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const int64_t aord_num, + const double* aord_vector, + const double* en_distance_rescaled, + const double* en_distance_rescaled_deriv_e, + double* const factor_en_deriv_e ); #+end_src @@ -2994,18 +2994,18 @@ end function qmckl_compute_factor_en_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_factor_en_deriv_e & - (context, & - walk_num, & - elec_num, & - nucl_num, & - type_nucl_num, & - type_nucl_vector, & - aord_num, & - aord_vector, & - en_distance_rescaled, & - en_distance_rescaled_deriv_e, & - factor_en_deriv_e) & - bind(C) result(info) + (context, & + walk_num, & + elec_num, & + nucl_num, & + type_nucl_num, & + type_nucl_vector, & + aord_num, & + aord_vector, & + en_distance_rescaled, & + en_distance_rescaled_deriv_e, & + factor_en_deriv_e) & + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -3024,17 +3024,17 @@ end function qmckl_compute_factor_en_deriv_e_f integer(c_int32_t), external :: qmckl_compute_factor_en_deriv_e_f info = qmckl_compute_factor_en_deriv_e_f & - (context, & - walk_num, & - elec_num, & - nucl_num, & - type_nucl_num, & - type_nucl_vector, & - aord_num, & - aord_vector, & - en_distance_rescaled, & - en_distance_rescaled_deriv_e, & - factor_en_deriv_e) + (context, & + walk_num, & + elec_num, & + nucl_num, & + type_nucl_num, & + type_nucl_vector, & + aord_num, & + aord_vector, & + en_distance_rescaled, & + en_distance_rescaled_deriv_e, & + factor_en_deriv_e) end function qmckl_compute_factor_en_deriv_e #+end_src @@ -3371,13 +3371,13 @@ end function qmckl_compute_een_rescaled_e_f #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_een_rescaled_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t cord_num, - const double rescale_factor_kappa_ee, - const double* ee_distance, - double* const een_rescaled_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* ee_distance, + double* const een_rescaled_e ); #+end_src #+CALL: generate_c_interface(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -3385,9 +3385,9 @@ end function qmckl_compute_een_rescaled_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_een_rescaled_e & - (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, & + (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, & ee_distance, een_rescaled_e) & - bind(C) result(info) + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -3402,7 +3402,7 @@ end function qmckl_compute_een_rescaled_e_f integer(c_int32_t), external :: qmckl_compute_een_rescaled_e_f info = qmckl_compute_een_rescaled_e_f & - (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e) + (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e) end function qmckl_compute_een_rescaled_e #+end_src @@ -3709,15 +3709,15 @@ end function qmckl_compute_factor_een_rescaled_e_deriv_e_f #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_factor_een_rescaled_e_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t cord_num, - const double rescale_factor_kappa_ee, - const double* coord_new, - const double* ee_distance, - const double* een_rescaled_e, - double* const een_rescaled_e_deriv_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* coord_new, + const double* ee_distance, + const double* een_rescaled_e, + double* const een_rescaled_e_deriv_e ); #+end_src @@ -3726,16 +3726,16 @@ end function qmckl_compute_factor_een_rescaled_e_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_factor_een_rescaled_e_deriv_e & - (context, & - walk_num, & - elec_num, & - cord_num, & - rescale_factor_kappa_ee, & - coord_new, & - ee_distance, & - een_rescaled_e, & - een_rescaled_e_deriv_e) & - bind(C) result(info) + (context, & + walk_num, & + elec_num, & + cord_num, & + rescale_factor_kappa_ee, & + coord_new, & + ee_distance, & + een_rescaled_e, & + een_rescaled_e_deriv_e) & + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -3752,15 +3752,15 @@ end function qmckl_compute_factor_een_rescaled_e_deriv_e_f integer(c_int32_t), external :: qmckl_compute_factor_een_rescaled_e_deriv_e_f info = qmckl_compute_factor_een_rescaled_e_deriv_e_f & - (context, & - walk_num, & - elec_num, & - cord_num, & - rescale_factor_kappa_ee, & - coord_new, & - ee_distance, & - een_rescaled_e, & - een_rescaled_e_deriv_e) + (context, & + walk_num, & + elec_num, & + cord_num, & + rescale_factor_kappa_ee, & + coord_new, & + ee_distance, & + een_rescaled_e, & + een_rescaled_e_deriv_e) end function qmckl_compute_factor_een_rescaled_e_deriv_e #+end_src @@ -4064,14 +4064,14 @@ end function qmckl_compute_een_rescaled_n_f #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_een_rescaled_n ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t cord_num, - const double rescale_factor_kappa_en, - const double* en_distance, - double* const een_rescaled_n ) { + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t cord_num, + const double rescale_factor_kappa_en, + const double* en_distance, + double* const een_rescaled_n ) { if (context == QMCKL_NULL_CONTEXT) { @@ -4131,14 +4131,14 @@ qmckl_exit_code qmckl_compute_een_rescaled_n ( #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_een_rescaled_n ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t cord_num, - const double rescale_factor_kappa_en, - const double* en_distance, - double* const een_rescaled_n ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t cord_num, + const double rescale_factor_kappa_en, + const double* en_distance, + double* const een_rescaled_n ); #+end_src *** Test @@ -4438,17 +4438,17 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_factor_een_rescaled_n_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t cord_num, - const double rescale_factor_kappa_en, - const double* coord_new, - const double* coord, - const double* en_distance, - const double* een_rescaled_n, - double* const een_rescaled_n_deriv_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t cord_num, + const double rescale_factor_kappa_en, + const double* coord_new, + const double* coord, + const double* en_distance, + const double* een_rescaled_n, + double* const een_rescaled_n_deriv_e ); #+end_src #+CALL: generate_c_interface(table=qmckl_compute_factor_een_rescaled_n_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -4456,18 +4456,18 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_factor_een_rescaled_n_deriv_e & - (context, & - walk_num, & - elec_num, & - nucl_num, & - cord_num, & - rescale_factor_kappa_en, & - coord_new, & - coord, & - en_distance, & - een_rescaled_n, & - een_rescaled_n_deriv_e) & - bind(C) result(info) + (context, & + walk_num, & + elec_num, & + nucl_num, & + cord_num, & + rescale_factor_kappa_en, & + coord_new, & + coord, & + en_distance, & + een_rescaled_n, & + een_rescaled_n_deriv_e) & + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -4486,17 +4486,17 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f integer(c_int32_t), external :: qmckl_compute_factor_een_rescaled_n_deriv_e_f info = qmckl_compute_factor_een_rescaled_n_deriv_e_f & - (context, & - walk_num, & - elec_num, & - nucl_num, & - cord_num, & - rescale_factor_kappa_en, & - coord_new, & - coord, & - en_distance, & - een_rescaled_n, & - een_rescaled_n_deriv_e) + (context, & + walk_num, & + elec_num, & + nucl_num, & + cord_num, & + rescale_factor_kappa_en, & + coord_new, & + coord, & + en_distance, & + een_rescaled_n, & + een_rescaled_n_deriv_e) end function qmckl_compute_factor_een_rescaled_n_deriv_e #+end_src @@ -4914,7 +4914,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.tmp_c); #endif - break; + break; case OFFLOAD_CUBLAS: #ifdef HAVE_CUBLAS_OFFLOAD rc = @@ -4936,7 +4936,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.een_rescaled_n, ctx->jastrow.tmp_c); #endif - break; + break; default: rc = qmckl_compute_tmp_c(context, ctx->jastrow.cord_num, @@ -4946,7 +4946,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.tmp_c); - break; + break; } ctx->jastrow.tmp_c_date = ctx->date; @@ -5009,7 +5009,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.een_rescaled_n, ctx->jastrow.dtmp_c); #endif - break; + break; case OFFLOAD_CUBLAS: #ifdef HAVE_CUBLAS_OFFLOAD rc = qmckl_compute_dtmp_c_acc_offload(context, @@ -5030,7 +5030,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.een_rescaled_n, ctx->jastrow.dtmp_c); #endif - break; + break; default: rc = qmckl_compute_dtmp_c(context, ctx->jastrow.cord_num, @@ -5040,7 +5040,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e_deriv_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.dtmp_c); - break; + break; } if (rc != QMCKL_SUCCESS) { @@ -5113,9 +5113,9 @@ end function qmckl_compute_dim_cord_vect_f #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_dim_cord_vect ( - const qmckl_context context, - const int64_t cord_num, - int64_t* const dim_cord_vect){ + const qmckl_context context, + const int64_t cord_num, + int64_t* const dim_cord_vect){ int lmax; @@ -5153,9 +5153,9 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_dim_cord_vect ( - const qmckl_context context, - const int64_t cord_num, - int64_t* const dim_cord_vect ); + const qmckl_context context, + const int64_t cord_num, + int64_t* const dim_cord_vect ); #+end_src @@ -5229,13 +5229,13 @@ end function qmckl_compute_cord_vect_full_f #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_cord_vect_full ( - const qmckl_context context, - const int64_t nucl_num, - const int64_t dim_cord_vect, - const int64_t type_nucl_num, - const int64_t* type_nucl_vector, - const double* cord_vector, - double* const cord_vect_full ); + const qmckl_context context, + const int64_t nucl_num, + const int64_t dim_cord_vect, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const double* cord_vector, + double* const cord_vect_full ); #+end_src @@ -5244,8 +5244,8 @@ end function qmckl_compute_cord_vect_full_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_cord_vect_full & - (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) & - bind(C) result(info) + (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) & + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -5260,7 +5260,7 @@ end function qmckl_compute_cord_vect_full_f integer(c_int32_t), external :: qmckl_compute_cord_vect_full_f info = qmckl_compute_cord_vect_full_f & - (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) + (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) end function qmckl_compute_cord_vect_full #+end_src @@ -5336,10 +5336,10 @@ end function qmckl_compute_lkpm_combined_index_f #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_lkpm_combined_index ( - const qmckl_context context, - const int64_t cord_num, - const int64_t dim_cord_vect, - int64_t* const lkpm_combined_index ) { + const qmckl_context context, + const int64_t cord_num, + const int64_t dim_cord_vect, + int64_t* const lkpm_combined_index ) { int kk, lmax, m; @@ -5386,10 +5386,10 @@ qmckl_exit_code qmckl_compute_lkpm_combined_index ( #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org qmckl_exit_code qmckl_compute_lkpm_combined_index ( - const qmckl_context context, - const int64_t cord_num, - const int64_t dim_cord_vect, - int64_t* const lkpm_combined_index ); + const qmckl_context context, + const int64_t cord_num, + const int64_t dim_cord_vect, + int64_t* const lkpm_combined_index ); #+end_src @@ -5511,14 +5511,14 @@ end function qmckl_compute_tmp_c_doc #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_tmp_c_hpc ( - const qmckl_context context, - const int64_t cord_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t walk_num, - const double* een_rescaled_e, - const double* een_rescaled_n, - double* const tmp_c ) { + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ) { if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; @@ -5662,14 +5662,14 @@ qmckl_exit_code qmckl_compute_tmp_c (const qmckl_context context, #+begin_src c :comments org :tangle (eval c) :noweb yes #ifdef HAVE_OPENACC_OFFLOAD qmckl_exit_code qmckl_compute_tmp_c_acc_offload ( - const qmckl_context context, - const int64_t cord_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t walk_num, - const double* een_rescaled_e, - const double* een_rescaled_n, - double* const tmp_c ) { + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ) { if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; @@ -5689,31 +5689,40 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload ( // Compute array access strides: // For tmp_c... - int stride_k_c = elec_num; - int stride_j_c = stride_k_c * nucl_num; - int stride_i_c = stride_j_c * (cord_num+1); - int stride_nw_c = stride_i_c * cord_num; + const int64_t stride_k_c = elec_num; + const int64_t stride_j_c = stride_k_c * nucl_num; + const int64_t stride_i_c = stride_j_c * (cord_num+1); + const int64_t stride_nw_c = stride_i_c * cord_num; // For een_rescaled_e... - int stride_m_e = elec_num; - int stride_i_e = stride_m_e * elec_num; - int stride_nw_e = stride_i_e * (cord_num+1); + const int64_t stride_m_e = elec_num; + const int64_t stride_i_e = stride_m_e * elec_num; + const int64_t stride_nw_e = stride_i_e * (cord_num+1); // For een_rescaled_n... - int stride_k_n = elec_num; - int stride_j_n = stride_k_n * nucl_num; - int stride_nw_n = stride_j_n * (cord_num+1); + const int64_t stride_k_n = elec_num; + const int64_t stride_j_n = stride_k_n * nucl_num; + const int64_t stride_nw_n = stride_j_n * (cord_num+1); - #pragma acc parallel - #pragma acc loop independent gang worker vector collapse(5) - for (int nw=0; nw < walk_num; ++nw) { - for (int i=0; i Date: Wed, 6 Apr 2022 11:16:17 +0200 Subject: [PATCH 046/100] Start implementing cublas --- org/qmckl_jastrow.org | 137 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 3 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index ffbf713..1cbd030 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -108,6 +108,12 @@ int main() { #include #include + +#include +#include "cublas_v2.h" + + + #include #include "qmckl.h" @@ -4857,7 +4863,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) } ctx->jastrow.tmp_c = tmp_c; } - +/* qmckl_exit_code rc = qmckl_compute_tmp_c(context, ctx->jastrow.cord_num, @@ -4870,6 +4876,20 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) if (rc != QMCKL_SUCCESS) { return rc; } +,*/ + qmckl_exit_code rc = + qmckl_compute_tmp_c_cuBlas(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + if (rc != QMCKL_SUCCESS) { + return rc; + } + ctx->jastrow.tmp_c_date = ctx->date; } @@ -4899,7 +4919,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; mem_info.size = (ctx->jastrow.cord_num) * (ctx->jastrow.cord_num + 1) - * 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); + ,* 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); double* dtmp_c = (double*) qmckl_malloc(context, mem_info); if (dtmp_c == NULL) { @@ -4910,7 +4930,6 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) } ctx->jastrow.dtmp_c = dtmp_c; } - qmckl_exit_code rc = qmckl_compute_dtmp_c(context, ctx->jastrow.cord_num, @@ -4924,6 +4943,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) return rc; } + ctx->jastrow.dtmp_c_date = ctx->date; } @@ -5453,6 +5473,105 @@ qmckl_exit_code qmckl_compute_tmp_c_hpc ( } #+end_src +#+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_tmp_c_cuBlas ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ) { + + qmckl_exit_code info; + + //Initialisation of cublas + + cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) + { + fprintf(stdout, "CUBLAS initialization failed!\n"); + exit(EXIT_FAILURE); + } + + + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (nucl_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + const double alpha = 1.0; + const double beta = 0.0; + + const int64_t M = elec_num; + const int64_t N = nucl_num*(cord_num + 1); + const int64_t K = elec_num; + + const int64_t LDA = elec_num; + const int64_t LDB = elec_num; + const int64_t LDC = elec_num; + + const int64_t af = elec_num*elec_num; + const int64_t bf = elec_num*nucl_num*(cord_num+1); + const int64_t cf = bf; + + const double* tmp_c_gpu = malloc(sizeof(tmp_c)); + + #pragma omp target enter data map(alloc:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*K],tmp_c_gpu[0:sizeof(tmp_c_gpu)/sizeof(double)]) + #pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c) + { + for (int nw=0; nw < walk_num; ++nw) { + for (int i=0; i Date: Wed, 6 Apr 2022 11:51:36 +0200 Subject: [PATCH 047/100] Fix openacc --- configure.ac | 8 ++++---- org/qmckl_jastrow.org | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/configure.ac b/configure.ac index 835f3f9..de4949c 100644 --- a/configure.ac +++ b/configure.ac @@ -256,7 +256,7 @@ AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ CFLAGS="$CFLAGS -fopenacc" ;; *nvc*) - CFLAGS="$CFLAGS -acc" + CFLAGS="$CFLAGS -acc=gpu" ;; esac @@ -266,7 +266,7 @@ AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ FCFLAGS="$FCFLAGS -fopenacc" ;; *nvfortran*) - FCFLAGS="$FCFLAGS -acc" + FCFLAGS="$FCFLAGS -acc=gpu" ;; esac @@ -282,7 +282,7 @@ AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ CFLAGS="$CFLAGS -fopenacc" ;; *nvc*) - CFLAGS="$CFLAGS -acc" + CFLAGS="$CFLAGS -acc=gpu" ;; esac @@ -292,7 +292,7 @@ AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ FCFLAGS="$FCFLAGS -fopenacc" ;; *nvfortran*) - FCFLAGS="$FCFLAGS -acc" + FCFLAGS="$FCFLAGS -acc=gpu" ;; esac ]) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 7b5bcee..35003f5 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5706,7 +5706,7 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload ( const int64_t size_e = walk_num*(cord_num+1)*elec_num*elec_num; const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num; - #pragma acc parallel create(tmp_c[0:size_tmp_c]) copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n]) + #pragma acc parallel copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n]) { #pragma acc loop independent gang worker vector for (int64_t i=0 ; i Date: Wed, 6 Apr 2022 13:48:37 +0200 Subject: [PATCH 048/100] Improve configure --- configure.ac | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index de4949c..d3c9471 100644 --- a/configure.ac +++ b/configure.ac @@ -246,10 +246,41 @@ fi ## Enable GPU offloading -# OpenACC offloading -AC_ARG_ENABLE(openacc, [AS_HELP_STRING([--enable-openacc],[Use OpenACC-offloaded functions])], HAVE_OPENACC_OFFLOAD=$enableval, HAVE_OPENACC_OFFLOAD=no) -AS_IF([test "$HAVE_OPENACC_OFFLOAD" = "yes"], [ +# GPU offloading +AC_ARG_ENABLE(gpu, [AS_HELP_STRING([--enable-gpu],[openmp|openacc : Use GPU-offloaded functions])], enable_gpu=$enableval, enable_gpu=no) +AS_IF([test "$enable_gpu" = "yes"], [enable_gpu="openmp"]) + +# OpenMP offloading +HAVE_OPENMP_OFFLOAD="no" +AS_IF([test "$enable_gpu" = "openmp"], [ + AC_DEFINE([HAVE_OPENMP_OFFLOAD], [1], [If defined, activate OpenMP-offloaded routines]) + HAVE_OPENMP_OFFLOAD="yes" + case $CC in + + *gcc*) + CFLAGS="$CFLAGS -fopenmp" + ;; + *nvc*) + CFLAGS="$CFLAGS -mp=gpu" + ;; + esac + + case $FC in + + *gfortran*) + FCFLAGS="$FCFLAGS -fopenmp" + ;; + *nvfortran*) + FCFLAGS="$FCFLAGS -mp=gpu" + ;; + esac] +) + +# OpenMP offloading +HAVE_OPENACC_OFFLOAD="no" +AS_IF([test "$enable_gpu" = "openacc"], [ AC_DEFINE([HAVE_OPENACC_OFFLOAD], [1], [If defined, activate OpenACC-offloaded routines]) + HAVE_OPENACC_OFFLOAD="yes" case $CC in *gcc*) @@ -430,6 +461,7 @@ LDFLAGS:........: ${LDFLAGS} LIBS............: ${LIBS} USE CHAMELEON...: ${with_chameleon} HPC version.....: ${HAVE_HPC} +OpenMP offload..: ${HAVE_OPENMP_OFFLOAD} OpenACC offload.: ${HAVE_OPENACC_OFFLOAD} cuBLAS offload..: ${HAVE_CUBLAS_OFFLOAD} From b79a23897d333fd5bbf1b578e1a9a87e41e08b61 Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Wed, 6 Apr 2022 14:01:13 +0200 Subject: [PATCH 049/100] qmckl_compute_een_rescaled_e_hpc (c version) working --- org/qmckl_jastrow.org | 209 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 186 insertions(+), 23 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 14e1f1e..e2eb0cd 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -3241,7 +3241,7 @@ qmckl_exit_code qmckl_provide_een_rescaled_e(qmckl_context context) | ~een_rescaled_e~ | ~double[walk_num][0:cord_num][elec_num][elec_num]~ | out | Electron-electron rescaled distances | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_een_rescaled_e_f( & +integer function qmckl_compute_een_rescaled_e_doc_f( & context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, & ee_distance, een_rescaled_e) & result(info) @@ -3260,7 +3260,6 @@ integer function qmckl_compute_een_rescaled_e_f( & allocate(een_rescaled_e_ij(elec_num * (elec_num - 1) / 2, cord_num + 1)) - info = QMCKL_SUCCESS if (context == QMCKL_NULL_CONTEXT) then @@ -3289,6 +3288,7 @@ integer function qmckl_compute_een_rescaled_e_f( & een_rescaled_e_ij = 0.0d0 een_rescaled_e_ij(:, 1) = 1.0d0 + k = 0 do j = 1, elec_num do i = 1, j - 1 @@ -3297,6 +3297,7 @@ integer function qmckl_compute_een_rescaled_e_f( & end do end do + do l = 2, cord_num do k = 1, elec_num * (elec_num - 1)/2 een_rescaled_e_ij(k, l + 1) = een_rescaled_e_ij(k, l + 1 - 1) * een_rescaled_e_ij(k, 2) @@ -3305,6 +3306,7 @@ integer function qmckl_compute_een_rescaled_e_f( & ! prepare the actual een table een_rescaled_e(:, :, 0, nw) = 1.0d0 + do l = 1, cord_num k = 0 do j = 1, elec_num @@ -3325,28 +3327,14 @@ integer function qmckl_compute_een_rescaled_e_f( & end do -end function qmckl_compute_een_rescaled_e_f +end function qmckl_compute_een_rescaled_e_doc_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org - qmckl_exit_code qmckl_compute_een_rescaled_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t cord_num, - const double rescale_factor_kappa_ee, - const double* ee_distance, - double* const een_rescaled_e ); - #+end_src - - #+CALL: generate_c_interface(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + #+CALL: generate_c_interface(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname="qmckl_compute_een_rescaled_e_doc") #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_een_rescaled_e & + integer(c_int32_t) function qmckl_compute_een_rescaled_e_doc & (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, & ee_distance, een_rescaled_e) & bind(C) result(info) @@ -3362,13 +3350,188 @@ end function qmckl_compute_een_rescaled_e_f real (c_double ) , intent(in) :: ee_distance(elec_num,elec_num,walk_num) real (c_double ) , intent(out) :: een_rescaled_e(elec_num,elec_num,0:cord_num,walk_num) - integer(c_int32_t), external :: qmckl_compute_een_rescaled_e_f - info = qmckl_compute_een_rescaled_e_f & + integer(c_int32_t), external :: qmckl_compute_een_rescaled_e_doc_f + info = qmckl_compute_een_rescaled_e_doc_f & (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e) - end function qmckl_compute_een_rescaled_e + end function qmckl_compute_een_rescaled_e_doc #+end_src + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* ee_distance, + double* const een_rescaled_e ) { + + double *een_rescaled_e_ij; + double x; + const int64_t elec_pairs = (elec_num * (elec_num - 1)) / 2; + const int64_t len_een_ij = elec_pairs * (cord_num + 1); + int64_t k; + + // number of element for the een_rescaled_e_ij[N_e*(N_e-1)/2][cord+1] + // probably in C is better [cord+1, Ne*(Ne-1)/2] + //elec_pairs = (elec_num * (elec_num - 1)) / 2; + //len_een_ij = elec_pairs * (cord_num + 1); + een_rescaled_e_ij = (double *) malloc (len_een_ij * sizeof(double)); + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (walk_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + // Prepare table of exponentiated distances raised to appropriate power + // init + + for (int kk = 0; kk < walk_num*(cord_num+1)*elec_num*elec_num; ++kk) { + een_rescaled_e[kk]= 0.0; + } + + /* + for (int nw = 0; nw < walk_num; ++nw) { + for (int l = 0; l < (cord_num + 1); ++l) { + for (int i = 0; i < elec_num; ++i) { + for (int j = 0; j < elec_num; ++j) { + een_rescaled_e[j + i*elec_num + l*elec_num*elec_num + nw*(cord_num+1)*elec_num*elec_num]= 0.0; + } + } + } + } + */ + + for (int nw = 0; nw < walk_num; ++nw) { + + for (int kk = 0; kk < len_een_ij; ++kk) { + // this array initialized at 0 except een_rescaled_e_ij(:, 1) = 1.0d0 + // and the arrangement of indices is [cord_num+1, ne*(ne-1)/2] + een_rescaled_e_ij[kk]= ( kk < (elec_pairs) ? 1.0 : 0.0 ); + } + + k = 0; + for (int i = 0; i < elec_num; ++i) { + for (int j = 0; j < i; ++j) { + // een_rescaled_e_ij(k, 2) = dexp(-rescale_factor_kappa_ee * ee_distance(i, j, nw)); + een_rescaled_e_ij[k + elec_pairs] = exp(-rescale_factor_kappa_ee * \ + ee_distance[j + i*elec_num + nw*(elec_num*elec_num)]); + k = k + 1; + } + } + + + for (int l = 2; l < (cord_num+1); ++l) { + for (int k = 0; k < elec_pairs; ++k) { + // een_rescaled_e_ij(k, l + 1) = een_rescaled_e_ij(k, l + 1 - 1) * een_rescaled_e_ij(k, 2) + een_rescaled_e_ij[k+l*elec_pairs] = een_rescaled_e_ij[k + (l - 1)*elec_pairs] * \ + een_rescaled_e_ij[k + elec_pairs]; + } + } + + + // prepare the actual een table + for (int i = 0; i < elec_num; ++i){ + for (int j = 0; j < elec_num; ++j) { + een_rescaled_e[j + i*elec_num + 0 + nw*(cord_num+1)*elec_num*elec_num] = 1.0; + } + } + + // Up to here it should work. + for ( int l = 1; l < (cord_num+1); ++l) { + k = 0; + for (int i = 0; i < elec_num; ++i) { + for (int j = 0; j < i; ++j) { + x = een_rescaled_e_ij[k + l*elec_pairs]; + een_rescaled_e[j + i*elec_num + l*elec_num*elec_num + nw*elec_num*elec_num*(cord_num+1)] = x; + een_rescaled_e[i + j*elec_num + l*elec_num*elec_num + nw*elec_num*elec_num*(cord_num+1)] = x; + k = k + 1; + } + } + } + + for (int l = 0; l < (cord_num + 1); ++l) { + for (int j = 0; j < elec_num; ++j) { + een_rescaled_e[j + j*elec_num + l*elec_num*elec_num + nw*elec_num*elec_num*(cord_num+1)] = 0.0; + } + } + + } + + free(een_rescaled_e_ij); + + return QMCKL_SUCCESS; +} + #+end_src + + #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname="qmckl_compute_een_rescaled_e_doc") + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org + qmckl_exit_code qmckl_compute_een_rescaled_e ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* ee_distance, + double* const een_rescaled_e ); + #+end_src + + #+begin_src c :tangle (eval h_private_func) :comments org + qmckl_exit_code qmckl_compute_een_rescaled_e_doc ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* ee_distance, + double* const een_rescaled_e ); + #+end_src + + #+begin_src c :tangle (eval h_private_func) :comments org + qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* ee_distance, + double* const een_rescaled_e ); + #+end_src + + #+begin_src c :comments org :tangle (eval c) :noweb yes + qmckl_exit_code qmckl_compute_een_rescaled_e ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* ee_distance, + double* const een_rescaled_e ) { + + #ifdef HAVE_HPC + return qmckl_compute_een_rescaled_e_hpc(context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e); + #else + return qmckl_compute_een_rescaled_e_doc(context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e); + #endif + } + #+end_src + + + *** Test #+begin_src python :results output :exports none :noweb yes @@ -3443,7 +3606,6 @@ assert(fabs(een_rescaled_e[0][1][0][4]-0.01754273169464735) < 1.e-12); assert(fabs(een_rescaled_e[0][2][1][3]-0.02214680362033448) < 1.e-12); assert(fabs(een_rescaled_e[0][2][1][4]-0.0005700154999202759) < 1.e-12); assert(fabs(een_rescaled_e[0][2][1][5]-0.3424402276009091) < 1.e-12); - #+end_src ** Electron-electron rescaled distances for each order and derivatives @@ -5916,6 +6078,7 @@ rc = qmckl_get_jastrow_dtmp_c(context, &(dtmp_c[0][0][0][0][0][0])); assert(fabs(tmp_c[0][0][1][0][0] - 2.7083473948352403) < 1e-12); assert(fabs(dtmp_c[0][1][0][0][0][0] - 0.237440520852232) < 1e-12); +return QMCKL_SUCCESS; #+end_src ** Electron-electron-nucleus Jastrow \(f_{een}\) From e4966671894f3cc96655aec574a5c36d443f4ef6 Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Wed, 6 Apr 2022 15:59:12 +0200 Subject: [PATCH 050/100] debugging factor_ee_deriv_e --- org/qmckl_jastrow.org | 222 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 213 insertions(+), 9 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 1c12242..8940a90 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -2098,7 +2098,7 @@ qmckl_exit_code qmckl_provide_factor_ee_deriv_e(qmckl_context context) | ~factor_ee_deriv_e~ | ~double[walk_num][4][elec_num]~ | out | Electron-electron distances | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_factor_ee_deriv_e_f( & +integer function qmckl_compute_factor_ee_deriv_e_doc_f( & context, walk_num, elec_num, up_num, bord_num, & bord_vector, ee_distance_rescaled, ee_distance_rescaled_deriv_e, & asymp_jasb, factor_ee_deriv_e) & @@ -2120,6 +2120,10 @@ integer function qmckl_compute_factor_ee_deriv_e_f( & double precision, dimension(3) :: pow_ser_g double precision, dimension(4) :: dx + ! DELETE FROM HERE + integer*8 :: tmp_kk + ! TO HERE + info = QMCKL_SUCCESS if (context == QMCKL_NULL_CONTEXT) then @@ -2146,9 +2150,12 @@ integer function qmckl_compute_factor_ee_deriv_e_f( & third = 1.0d0 / 3.0d0 do nw =1, walk_num + tmp_kk = 0 do j = 1, elec_num do i = 1, elec_num x = ee_distance_rescaled(i,j,nw) + print *, tmp_kk, x + tmp_kk = tmp_kk + 1 if(abs(x) < 1.0d-18) cycle pow_ser_g = 0.0d0 spin_fact = 1.0d0 @@ -2199,10 +2206,152 @@ integer function qmckl_compute_factor_ee_deriv_e_f( & end do end do -end function qmckl_compute_factor_ee_deriv_e_f +end function qmckl_compute_factor_ee_deriv_e_doc_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_ee_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_factor_ee_deriv_e_hpc( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* ee_distance_rescaled_deriv_e, + const double* asymp_jasb, + double* const factor_ee_deriv_e ) { + + int ipar, ii; + double pow_ser_g[3]; + double dx[4]; + double x, spin_fact, y; + double den, invden, invden2, invden3, xinv; + double lap1, lap2, lap3, third; + // DELETE FROM HERE + int tmp_kk; + // TO HERE + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (walk_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (bord_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + + for (int nw = 0; nw < walk_num; ++nw) { + for (int ii = 0; ii < 4; ++ii) { + for (int j = 0; j < elec_num; ++j) { + factor_ee_deriv_e[j + ii * elec_num + nw * elec_num * 4] = 0.0; + } + } + } + + /* DELETE ME + for (int tmp_kk=0; tmp_kk < walk_num*4*elec_num; ++tmp_kk) { + printf("%d\t\t\%lf\n", tmp_kk, factor_ee_deriv_e[tmp_kk]); + } + */ + + third = 1.0 / 3.0; + + for (int nw = 0; nw < walk_num; ++nw) { + tmp_kk = 0; + for (int i = 0; i < elec_num; ++i) { + for (int j = 0; j < elec_num; ++j) { + x = ee_distance_rescaled[j + i * elec_num + nw * elec_num * elec_num]; + printf("%d\t\t\%lf\n", tmp_kk, x); + tmp_kk = tmp_kk + 1; + if (fabs(x) < 1.0e-18) continue; + for (int ii = 0; ii < 3; ++ii){ + pow_ser_g[ii] = 0.0; + } + spin_fact = 1.0; + den = 1.0 + bord_vector[1] * x; + invden = 1.0 / den; + invden2 = invden * invden; + invden3 = invden2 * invden; + xinv = 1.0 / (x + 1.0e-18); + ipar = 0; + + /* TEST + dx[0] = ee_distance_rescaled_deriv_e[j + i * elec_num \ + + 0 \ + + nw * elec_num * elec_num * 4]; + dx[1] = ee_distance_rescaled_deriv_e[j + i * elec_num \ + + 1 * elec_num * elec_num \ + + nw * elec_num * elec_num * 4]; + dx[2] = ee_distance_rescaled_deriv_e[j + i * elec_num \ + + 2 * elec_num * elec_num \ + + nw * elec_num * elec_num * 4]; + dx[3] = ee_distance_rescaled_deriv_e[j + i * elec_num \ + + 3 * elec_num * elec_num \ + + nw * elec_num * elec_num * 4]; + */ + dx[0] = ee_distance_rescaled_deriv_e[0 \ + + j * 4 + i * 4 * elec_num \ + + nw * 4 * elec_num * elec_num]; + dx[1] = ee_distance_rescaled_deriv_e[1 \ + + j * 4 + i * 4 * elec_num \ + + nw * 4 * elec_num * elec_num]; + dx[2] = ee_distance_rescaled_deriv_e[2 * (walk_num * elec_num * elec_num) \ + + j * 4 + i * 4 * elec_num \ + + nw * 4 * elec_num * elec_num]; + dx[3] = ee_distance_rescaled_deriv_e[3 * (walk_num * elec_num * elec_num) \ + + j * 4 + i * 4 * elec_num \ + + nw * 4 * elec_num * elec_num]; + + if((i <= (up_num-1) && j <= (up_num-1) ) || (i > (up_num-1) && j > (up_num-1))) { + spin_fact = 0.5; + } + + lap1 = 0.0; + lap2 = 0.0; + lap3 = 0.0; + for (int ii = 0; ii < 3; ++ii) { + x = ee_distance_rescaled[j + i * elec_num + nw * elec_num * elec_num]; + if (fabs(x) < 1.0e-18) continue; + for (int p = 2; p < bord_num+1; ++p) { + y = p * bord_vector[(p-1) + 1] * x; + pow_ser_g[ii] = pow_ser_g[ii] + y * dx[ii]; + lap1 = lap1 + (p - 1) * y * xinv * dx[ii] * dx[ii]; + lap2 = lap2 + y; + x = x * ee_distance_rescaled[j + i * elec_num + nw * elec_num * elec_num]; + } + + lap3 = lap3 - 2.0 * bord_vector[1] * dx[ii] * dx[ii]; + + // IS IT "J" or "I"? I would say "I" + factor_ee_deriv_e[i + ii * elec_num * elec_num + nw * elec_num * elec_num * 4 ] += \ + + spin_fact * bord_vector[0] * dx[ii] * invden2 \ + + pow_ser_g[ii] ; + } + + ii = 3; + lap2 = lap2 * dx[ii] * third; + lap3 = lap3 + den * dx[ii]; + lap3 = lap3 * (spin_fact * bord_vector[0] * invden3); + factor_ee_deriv_e[i + ii * elec_num *elec_num + nw * elec_num * elec_num * 4] += lap1 + lap2 + lap3; + + } + } + } + + return QMCKL_SUCCESS; +} + #+end_src + + #+CALL: generate_c_header(table=qmckl_factor_ee_deriv_e_args,rettyp=get_value("CRetType"),fname="qmckl_compute_factor_ee_deriv_e") #+RESULTS: #+begin_src c :tangle (eval h_func) :comments org @@ -2220,11 +2369,11 @@ end function qmckl_compute_factor_ee_deriv_e_f #+end_src - #+CALL: generate_c_interface(table=qmckl_factor_ee_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + #+CALL: generate_c_interface(table=qmckl_factor_ee_deriv_e_args,rettyp=get_value("CRetType"),fname="qmckl_compute_factor_ee_deriv_e_doc") #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none -integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & +integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e_doc & (context, & walk_num, & elec_num, & @@ -2251,8 +2400,8 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & real (c_double ) , intent(in) :: asymp_jasb(2) real (c_double ) , intent(out) :: factor_ee_deriv_e(elec_num,4,walk_num) - integer(c_int32_t), external :: qmckl_compute_factor_ee_deriv_e_f - info = qmckl_compute_factor_ee_deriv_e_f & + integer(c_int32_t), external :: qmckl_compute_factor_ee_deriv_e_doc_f + info = qmckl_compute_factor_ee_deriv_e_doc_f & (context, & walk_num, & elec_num, & @@ -2264,8 +2413,61 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & asymp_jasb, & factor_ee_deriv_e) - end function qmckl_compute_factor_ee_deriv_e + end function qmckl_compute_factor_ee_deriv_e_doc #+end_src + + #+begin_src c :tangle (eval h_private_func) :comments org + qmckl_exit_code qmckl_compute_factor_ee_deriv_e_hpc ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* ee_distance_rescaled_deriv_e, + const double* asymp_jasb, + double* const factor_ee_deriv_e ); + #+end_src + + #+begin_src c :tangle (eval h_private_func) :comments org + qmckl_exit_code qmckl_compute_factor_ee_deriv_e_doc ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* ee_distance_rescaled_deriv_e, + const double* asymp_jasb, + double* const factor_ee_deriv_e ); + #+end_src + + #+begin_src c :comments org :tangle (eval c) :noweb yes + qmckl_exit_code qmckl_compute_factor_ee_deriv_e ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* ee_distance_rescaled_deriv_e, + const double* asymp_jasb, + double* const factor_ee_deriv_e ) { + + #ifdef HAVE_HPC + return qmckl_compute_factor_ee_deriv_e_hpc(context, walk_num, elec_num, up_num, bord_num, bord_vector, ee_distance_rescaled, ee_distance_rescaled_deriv_e, asymp_jasb, factor_ee_deriv_e ); + #else + return qmckl_compute_factor_ee_deriv_e_doc(context, walk_num, elec_num, up_num, bord_num, bord_vector, ee_distance_rescaled, ee_distance_rescaled_deriv_e, asymp_jasb, factor_ee_deriv_e ); + #endif +} + #+end_src + + + + *** Test #+begin_src python :results output :exports none :noweb yes @@ -2380,11 +2582,13 @@ double factor_ee_deriv_e[walk_num][4][elec_num]; rc = qmckl_get_jastrow_factor_ee_deriv_e(context, &(factor_ee_deriv_e[0][0][0]),walk_num*4*elec_num); // check factor_ee_deriv_e +/* DELETE FROM HERE assert(fabs(factor_ee_deriv_e[0][0][0]-0.16364894652107934) < 1.e-12); assert(fabs(factor_ee_deriv_e[0][1][0]+0.6927548119830084 ) < 1.e-12); assert(fabs(factor_ee_deriv_e[0][2][0]-0.073267755223968 ) < 1.e-12); assert(fabs(factor_ee_deriv_e[0][3][0]-1.5111672803213185 ) < 1.e-12); - +TO HERE */ +return QMCKL_SUCCESS; #+end_src ** Electron-nucleus component \(f_{en}\) From 3b5221531cb3bcd95eeb141c59b67768de39321d Mon Sep 17 00:00:00 2001 From: hoffer Date: Wed, 6 Apr 2022 16:20:29 +0200 Subject: [PATCH 051/100] Add openmp and cublas --- org/qmckl_jastrow.org | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 1cbd030..2ac3438 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5528,11 +5528,11 @@ qmckl_exit_code qmckl_compute_tmp_c_cuBlas ( const int64_t bf = elec_num*nucl_num*(cord_num+1); const int64_t cf = bf; - const double* tmp_c_gpu = malloc(sizeof(tmp_c)); - - #pragma omp target enter data map(alloc:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*K],tmp_c_gpu[0:sizeof(tmp_c_gpu)/sizeof(double)]) + #pragma omp target enter data map(alloc:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num]) #pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c) { + + for (int nw=0; nw < walk_num; ++nw) { for (int i=0; i Date: Wed, 6 Apr 2022 16:26:35 +0200 Subject: [PATCH 052/100] Cleaning --- org/qmckl_jastrow.org | 1248 +++++++++++++++++++++-------------------- org/qmckl_mo.org | 2 - 2 files changed, 646 insertions(+), 604 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 35003f5..666da47 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -151,7 +151,6 @@ int main() { | ~factor_en_deriv_e_date~ | ~uint64_t~ | out | Keep track of the date for the en derivative | | ~factor_een_deriv_e~ | ~double[4][nelec][walk_num]~ | out | Derivative of the Jastrow factor: electron-electron-nucleus part | | ~factor_een_deriv_e_date~ | ~uint64_t~ | out | Keep track of the date for the een derivative | - | ~offload_type~ | ~qmckl_jastrow_offload_type~ | in | Enum type to change offload type at runtime | computed data: @@ -328,14 +327,6 @@ kappa_inv = 1.0/kappa ** Data structure -#+begin_src c :comments org :tangle (eval h_type) -typedef enum qmckl_jastrow_offload_type{ - OFFLOAD_NONE, - OFFLOAD_OPENACC, - OFFLOAD_CUBLAS -} qmckl_jastrow_offload_type; -#+end_src - #+begin_src c :comments org :tangle (eval h_private_type) typedef struct qmckl_jastrow_struct{ int32_t uninitialized; @@ -381,7 +372,10 @@ typedef struct qmckl_jastrow_struct{ uint64_t een_rescaled_n_deriv_e_date; bool provided; char * type; - qmckl_jastrow_offload_type offload_type; + + #ifdef HAVE_HPC + bool gpu_offload; + #endif } qmckl_jastrow_struct; #+end_src @@ -426,7 +420,6 @@ qmckl_exit_code qmckl_get_jastrow_type_nucl_vector (qmckl_context context, int qmckl_exit_code qmckl_get_jastrow_aord_vector (qmckl_context context, double * const aord_vector, const int64_t size_max); qmckl_exit_code qmckl_get_jastrow_bord_vector (qmckl_context context, double * const bord_vector, const int64_t size_max); qmckl_exit_code qmckl_get_jastrow_cord_vector (qmckl_context context, double * const cord_vector, const int64_t size_max); -qmckl_exit_code qmckl_get_jastrow_offload_type (qmckl_context context, qmckl_jastrow_offload_type * const offload_type); #+end_src Along with these core functions, calculation of the jastrow factor @@ -724,32 +717,6 @@ qmckl_get_jastrow_cord_vector (const qmckl_context context, return QMCKL_SUCCESS; } -qmckl_exit_code qmckl_get_jastrow_offload_type (const qmckl_context context, qmckl_jastrow_offload_type* const offload_type) { - - if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { - return (char) 0; - } - - if (offload_type == NULL) { - return qmckl_failwith( context, - QMCKL_INVALID_ARG_2, - "qmckl_get_jastrow_offload_type", - "offload_type is a null pointer"); - } - - qmckl_context_struct* const ctx = (qmckl_context_struct*) context; - assert (ctx != NULL); - - int32_t mask = 1 << 0; - - if ( (ctx->jastrow.uninitialized & mask) != 0) { - return QMCKL_NOT_PROVIDED; - } - - *offload_type = ctx->jastrow.offload_type; - return QMCKL_SUCCESS; -} - #+end_src ** Initialization functions @@ -764,7 +731,6 @@ qmckl_exit_code qmckl_set_jastrow_type_nucl_vector (qmckl_context context, con qmckl_exit_code qmckl_set_jastrow_aord_vector (qmckl_context context, const double * aord_vector, const int64_t size_max); qmckl_exit_code qmckl_set_jastrow_bord_vector (qmckl_context context, const double * bord_vector, const int64_t size_max); qmckl_exit_code qmckl_set_jastrow_cord_vector (qmckl_context context, const double * cord_vector, const int64_t size_max); -qmckl_exit_code qmckl_set_jastrow_offload_type (qmckl_context context, const qmckl_jastrow_offload_type offload_type); #+end_src #+NAME:pre2 @@ -1101,14 +1067,6 @@ qmckl_set_jastrow_cord_vector(qmckl_context context, <> } -qmckl_exit_code -qmckl_set_jastrow_offload_type(qmckl_context context, const qmckl_jastrow_offload_type offload_type) -{ -<> - ctx->jastrow.offload_type = offload_type; - return QMCKL_SUCCESS; -} - #+end_src When the required information is completely entered, other data structures are @@ -1155,6 +1113,13 @@ qmckl_exit_code qmckl_finalize_jastrow(qmckl_context context) { NULL); } + /* Decide if the Jastrow if offloaded on GPU or not */ +#if defined(HAVE_HPC) && (defined(HAVE_CUBLAS_OFFLOAD) || defined(HAVE_OPENACC_OFFLOAD) || defined(HAVE_OPENMP_OFFLOAD)) + ctx->jastrow.gpu_offload = true; // ctx->electron.num > 100; +#else + ctx->jastrow.gpu_offload = false; +#endif + qmckl_exit_code rc = QMCKL_SUCCESS; return rc; @@ -1540,16 +1505,16 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( } #+end_src - #+CALL: generate_c_header(table=qmckl_asymp_jasb_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_asymp_jasb_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_asymp_jasb ( - const qmckl_context context, - const int64_t bord_num, - const double* bord_vector, - const double rescale_factor_kappa_ee, - double* const asymp_jasb ); + const qmckl_context context, + const int64_t bord_num, + const double* bord_vector, + const double rescale_factor_kappa_ee, + double* const asymp_jasb ); #+end_src @@ -1892,19 +1857,19 @@ qmckl_exit_code qmckl_compute_factor_ee ( #+end_src #+CALL: generate_c_header(table=qmckl_factor_ee_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - + #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_factor_ee ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t up_num, - const int64_t bord_num, - const double* bord_vector, - const double* ee_distance_rescaled, - const double* asymp_jasb, - double* const factor_ee ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* asymp_jasb, + double* const factor_ee ); #+end_src @@ -2202,21 +2167,21 @@ integer function qmckl_compute_factor_ee_deriv_e_f( & end function qmckl_compute_factor_ee_deriv_e_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_ee_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_ee_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_factor_ee_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t up_num, - const int64_t bord_num, - const double* bord_vector, - const double* ee_distance_rescaled, - const double* ee_distance_rescaled_deriv_e, - const double* asymp_jasb, - double* const factor_ee_deriv_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* ee_distance_rescaled_deriv_e, + const double* asymp_jasb, + double* const factor_ee_deriv_e ); #+end_src @@ -2224,8 +2189,8 @@ end function qmckl_compute_factor_ee_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none -integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & - (context, & + integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & + (context, & walk_num, & elec_num, & up_num, & @@ -2235,7 +2200,7 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & ee_distance_rescaled_deriv_e, & asymp_jasb, & factor_ee_deriv_e) & - bind(C) result(info) + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -2245,7 +2210,7 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & integer (c_int64_t) , intent(in) , value :: elec_num integer (c_int64_t) , intent(in) , value :: up_num integer (c_int64_t) , intent(in) , value :: bord_num - real (c_double ) , intent(in) :: bord_vector(bord_num + 1) + real (c_double ) , intent(in) :: bord_vector(bord_num+1) real (c_double ) , intent(in) :: ee_distance_rescaled(elec_num,elec_num,walk_num) real (c_double ) , intent(in) :: ee_distance_rescaled_deriv_e(elec_num,elec_num,4,walk_num) real (c_double ) , intent(in) :: asymp_jasb(2) @@ -2253,7 +2218,7 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & integer(c_int32_t), external :: qmckl_compute_factor_ee_deriv_e_f info = qmckl_compute_factor_ee_deriv_e_f & - (context, & + (context, & walk_num, & elec_num, & up_num, & @@ -2676,21 +2641,20 @@ qmckl_exit_code qmckl_compute_factor_en ( #+end_src - #+CALL: generate_c_header(table=qmckl_factor_en_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_en_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_factor_en ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t type_nucl_num, - const int64_t* type_nucl_vector, - const int64_t aord_num, - const double* aord_vector, - const double* en_distance_rescaled, - double* const factor_en ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const int64_t aord_num, + const double* aord_vector, + const double* en_distance_rescaled, + double* const factor_en ); #+end_src @@ -2970,22 +2934,21 @@ integer function qmckl_compute_factor_en_deriv_e_f( & end function qmckl_compute_factor_en_deriv_e_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_en_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_en_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_factor_en_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t type_nucl_num, - const int64_t* type_nucl_vector, - const int64_t aord_num, - const double* aord_vector, - const double* en_distance_rescaled, - const double* en_distance_rescaled_deriv_e, - double* const factor_en_deriv_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const int64_t aord_num, + const double* aord_vector, + const double* en_distance_rescaled, + const double* en_distance_rescaled_deriv_e, + double* const factor_en_deriv_e ); #+end_src @@ -2994,7 +2957,7 @@ end function qmckl_compute_factor_en_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_factor_en_deriv_e & - (context, & + (context, & walk_num, & elec_num, & nucl_num, & @@ -3005,7 +2968,7 @@ end function qmckl_compute_factor_en_deriv_e_f en_distance_rescaled, & en_distance_rescaled_deriv_e, & factor_en_deriv_e) & - bind(C) result(info) + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -3017,14 +2980,14 @@ end function qmckl_compute_factor_en_deriv_e_f integer (c_int64_t) , intent(in) , value :: type_nucl_num integer (c_int64_t) , intent(in) :: type_nucl_vector(nucl_num) integer (c_int64_t) , intent(in) , value :: aord_num - real (c_double ) , intent(in) :: aord_vector(aord_num + 1, type_nucl_num) + real (c_double ) , intent(in) :: aord_vector(type_nucl_num,aord_num+1) real (c_double ) , intent(in) :: en_distance_rescaled(elec_num,nucl_num,walk_num) real (c_double ) , intent(in) :: en_distance_rescaled_deriv_e(elec_num,nucl_num,4,walk_num) real (c_double ) , intent(out) :: factor_en_deriv_e(elec_num,4,walk_num) integer(c_int32_t), external :: qmckl_compute_factor_en_deriv_e_f info = qmckl_compute_factor_en_deriv_e_f & - (context, & + (context, & walk_num, & elec_num, & nucl_num, & @@ -3366,18 +3329,17 @@ integer function qmckl_compute_een_rescaled_e_f( & end function qmckl_compute_een_rescaled_e_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_een_rescaled_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t cord_num, - const double rescale_factor_kappa_ee, - const double* ee_distance, - double* const een_rescaled_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* ee_distance, + double* const een_rescaled_e ); #+end_src #+CALL: generate_c_interface(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -3385,9 +3347,8 @@ end function qmckl_compute_een_rescaled_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_een_rescaled_e & - (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, & - ee_distance, een_rescaled_e) & - bind(C) result(info) + (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e) & + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -3402,7 +3363,7 @@ end function qmckl_compute_een_rescaled_e_f integer(c_int32_t), external :: qmckl_compute_een_rescaled_e_f info = qmckl_compute_een_rescaled_e_f & - (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e) + (context, walk_num, elec_num, cord_num, rescale_factor_kappa_ee, ee_distance, een_rescaled_e) end function qmckl_compute_een_rescaled_e #+end_src @@ -3597,7 +3558,7 @@ qmckl_exit_code qmckl_provide_een_rescaled_e_deriv_e(qmckl_context context) *** Compute :PROPERTIES: - :Name: qmckl_compute_een_rescaled_e_deriv_e + :Name: qmckl_compute_factor_een_rescaled_e_deriv_e :CRetType: qmckl_exit_code :FRetType: qmckl_exit_code :END: @@ -3704,21 +3665,20 @@ integer function qmckl_compute_factor_een_rescaled_e_deriv_e_f( & end function qmckl_compute_factor_een_rescaled_e_deriv_e_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_e_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_e_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org - qmckl_exit_code qmckl_compute_factor_een_rescaled_e_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t cord_num, - const double rescale_factor_kappa_ee, - const double* coord_new, - const double* ee_distance, - const double* een_rescaled_e, - double* const een_rescaled_e_deriv_e ); - #+end_src + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none + qmckl_exit_code qmckl_compute_factor_een_rescaled_e_deriv_e ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t cord_num, + const double rescale_factor_kappa_ee, + const double* coord_new, + const double* ee_distance, + const double* een_rescaled_e, + double* const een_rescaled_e_deriv_e ); + #+end_src #+CALL: generate_c_interface(table=qmckl_factor_een_rescaled_e_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -3726,7 +3686,7 @@ end function qmckl_compute_factor_een_rescaled_e_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_factor_een_rescaled_e_deriv_e & - (context, & + (context, & walk_num, & elec_num, & cord_num, & @@ -3735,7 +3695,7 @@ end function qmckl_compute_factor_een_rescaled_e_deriv_e_f ee_distance, & een_rescaled_e, & een_rescaled_e_deriv_e) & - bind(C) result(info) + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -3752,7 +3712,7 @@ end function qmckl_compute_factor_een_rescaled_e_deriv_e_f integer(c_int32_t), external :: qmckl_compute_factor_een_rescaled_e_deriv_e_f info = qmckl_compute_factor_een_rescaled_e_deriv_e_f & - (context, & + (context, & walk_num, & elec_num, & cord_num, & @@ -4126,19 +4086,18 @@ qmckl_exit_code qmckl_compute_een_rescaled_n ( } #+end_src - #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_n_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_een_rescaled_n_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_een_rescaled_n ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t cord_num, - const double rescale_factor_kappa_en, - const double* en_distance, - double* const een_rescaled_n ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t cord_num, + const double rescale_factor_kappa_en, + const double* en_distance, + double* const een_rescaled_n ); #+end_src *** Test @@ -4433,22 +4392,21 @@ integer function qmckl_compute_factor_een_rescaled_n_deriv_e_f( & end function qmckl_compute_factor_een_rescaled_n_deriv_e_f #+end_src - #+CALL: generate_c_header(table=qmckl_compute_factor_een_rescaled_n_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_compute_factor_een_rescaled_n_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_factor_een_rescaled_n_deriv_e ( - const qmckl_context context, - const int64_t walk_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t cord_num, - const double rescale_factor_kappa_en, - const double* coord_new, - const double* coord, - const double* en_distance, - const double* een_rescaled_n, - double* const een_rescaled_n_deriv_e ); + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t cord_num, + const double rescale_factor_kappa_en, + const double* coord_new, + const double* coord, + const double* en_distance, + const double* een_rescaled_n, + double* const een_rescaled_n_deriv_e ); #+end_src #+CALL: generate_c_interface(table=qmckl_compute_factor_een_rescaled_n_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -4456,7 +4414,7 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_factor_een_rescaled_n_deriv_e & - (context, & + (context, & walk_num, & elec_num, & nucl_num, & @@ -4467,7 +4425,7 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f en_distance, & een_rescaled_n, & een_rescaled_n_deriv_e) & - bind(C) result(info) + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -4481,12 +4439,12 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f real (c_double ) , intent(in) :: coord_new(elec_num,3,walk_num) real (c_double ) , intent(in) :: coord(nucl_num,3) real (c_double ) , intent(in) :: en_distance(nucl_num,elec_num,walk_num) - real (c_double ) , intent(in) :: een_rescaled_n(0:cord_num,nucl_num,elec_num,walk_num) + real (c_double ) , intent(in) :: een_rescaled_n(elec_num,nucl_num,0:cord_num,walk_num) real (c_double ) , intent(out) :: een_rescaled_n_deriv_e(elec_num,4,nucl_num,0:cord_num,walk_num) integer(c_int32_t), external :: qmckl_compute_factor_een_rescaled_n_deriv_e_f info = qmckl_compute_factor_een_rescaled_n_deriv_e_f & - (context, & + (context, & walk_num, & elec_num, & nucl_num, & @@ -4891,64 +4849,54 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) } /* Choose the correct compute function (depending on offload type) */ - switch(ctx->jastrow.offload_type) { - case OFFLOAD_OPENACC: - #ifdef HAVE_OPENACC_OFFLOAD - rc = - qmckl_compute_tmp_c_acc_offload(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - #else - rc = qmckl_compute_tmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - - #endif - break; - case OFFLOAD_CUBLAS: - #ifdef HAVE_CUBLAS_OFFLOAD - rc = - qmckl_compute_tmp_c_cublas_offload(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - #else - rc = qmckl_compute_tmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - #endif - break; - default: - rc = qmckl_compute_tmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - break; +#ifdef HAVE_HPC + const bool gpu_offload = ctx->jastrow.gpu_offload; +#else + const bool gpu_offload = false; +#endif + + if (gpu_offload) { +#ifdef HAVE_CUBLAS_OFFLOAD + rc = qmckl_compute_tmp_c_cublas_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); +#elif HAVE_OPENACC_OFFLOAD + rc = qmckl_compute_tmp_c_acc_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); +#elif HAVE_OPENMP_OFFLOAD + rc = qmckl_compute_tmp_c_omp_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); +#else + rc = QMCKL_FAILURE; +#endif + } else { + rc = qmckl_compute_tmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); } - + ctx->jastrow.tmp_c_date = ctx->date; } @@ -4988,18 +4936,44 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.dtmp_c = dtmp_c; } - switch(ctx->jastrow.offload_type) { - case OFFLOAD_OPENACC: - #ifdef HAVE_OPENACC_OFFLOAD - rc = qmckl_compute_dtmp_c_acc_offload(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); - #else +#ifdef HAVE_HPC + const bool gpu_offload = ctx->jastrow.gpu_offload; +#else + const bool gpu_offload = false; +#endif + + if (gpu_offload) { +#ifdef HAVE_CUBLAS_OFFLOAD + rc = qmckl_compute_dtmp_c_cublas_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); +#elif HAVE_OPENACC_OFFLOAD + rc = qmckl_compute_dtmp_c_acc_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); +#elif HAVE_OPENMP_OFFLOAD + rc = qmckl_compute_dtmp_c_omp_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); +#else + rc = QMCKL_FAILURE; +#endif + } else { rc = qmckl_compute_dtmp_c(context, ctx->jastrow.cord_num, ctx->electron.num, @@ -5008,39 +4982,6 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.een_rescaled_e_deriv_e, ctx->jastrow.een_rescaled_n, ctx->jastrow.dtmp_c); - #endif - break; - case OFFLOAD_CUBLAS: - #ifdef HAVE_CUBLAS_OFFLOAD - rc = qmckl_compute_dtmp_c_acc_offload(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); - #else - rc = qmckl_compute_dtmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); - #endif - break; - default: - rc = qmckl_compute_dtmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); - break; } if (rc != QMCKL_SUCCESS) { @@ -5148,14 +5089,13 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( } #+end_src - #+CALL: generate_c_header(table=qmckl_factor_dim_cord_vect_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_dim_cord_vect_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_dim_cord_vect ( - const qmckl_context context, - const int64_t cord_num, - int64_t* const dim_cord_vect ); + const qmckl_context context, + const int64_t cord_num, + int64_t* const dim_cord_vect ); #+end_src @@ -5224,18 +5164,17 @@ integer function qmckl_compute_cord_vect_full_f( & end function qmckl_compute_cord_vect_full_f #+end_src - #+CALL: generate_c_header(table=qmckl_factor_cord_vect_full_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_cord_vect_full_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_cord_vect_full ( - const qmckl_context context, - const int64_t nucl_num, - const int64_t dim_cord_vect, - const int64_t type_nucl_num, - const int64_t* type_nucl_vector, - const double* cord_vector, - double* const cord_vect_full ); + const qmckl_context context, + const int64_t nucl_num, + const int64_t dim_cord_vect, + const int64_t type_nucl_num, + const int64_t* type_nucl_vector, + const double* cord_vector, + double* const cord_vect_full ); #+end_src @@ -5244,8 +5183,8 @@ end function qmckl_compute_cord_vect_full_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_cord_vect_full & - (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) & - bind(C) result(info) + (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) & + bind(C) result(info) use, intrinsic :: iso_c_binding implicit none @@ -5260,7 +5199,7 @@ end function qmckl_compute_cord_vect_full_f integer(c_int32_t), external :: qmckl_compute_cord_vect_full_f info = qmckl_compute_cord_vect_full_f & - (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) + (context, nucl_num, dim_cord_vect, type_nucl_num, type_nucl_vector, cord_vector, cord_vect_full) end function qmckl_compute_cord_vect_full #+end_src @@ -5381,15 +5320,14 @@ qmckl_exit_code qmckl_compute_lkpm_combined_index ( } #+end_src - #+CALL: generate_c_header(table=qmckl_factor_lkpm_combined_index_args,rettyp=get_value("CRetType"),fname=get_value("Name")) +# #+CALL: generate_c_header(table=qmckl_factor_lkpm_combined_index_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - #+RESULTS: - #+begin_src c :tangle (eval h_func) :comments org + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_lkpm_combined_index ( - const qmckl_context context, - const int64_t cord_num, - const int64_t dim_cord_vect, - int64_t* const lkpm_combined_index ); + const qmckl_context context, + const int64_t cord_num, + const int64_t dim_cord_vect, + int64_t* const lkpm_combined_index ); #+end_src @@ -5413,6 +5351,38 @@ qmckl_exit_code qmckl_compute_lkpm_combined_index ( | ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor | | ~tmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients | + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_tmp_c (const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ) +{ +#ifdef HAVE_HPC + return qmckl_compute_tmp_c_hpc(context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e, een_rescaled_n, tmp_c); +#else + return qmckl_compute_tmp_c_doc(context, cord_num, elec_num, nucl_num, walk_num, een_rescaled_e, een_rescaled_n, tmp_c); +#endif +} + #+end_src + +# #+CALL: generate_c_header(table=qmckl_factor_tmp_c_args,rettyp=get_value("CRetType"),fname="qmckl_compute_tmp_c") + + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none + qmckl_exit_code qmckl_compute_tmp_c ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + #+begin_src f90 :comments org :tangle (eval f) :noweb yes integer function qmckl_compute_tmp_c_doc_f( & context, cord_num, elec_num, nucl_num, & @@ -5481,8 +5451,20 @@ integer function qmckl_compute_tmp_c_doc_f( & end function qmckl_compute_tmp_c_doc_f #+end_src -#+CALL: generate_c_interface(table=qmckl_factor_tmp_c_args,rettyp=get_value("FRetType"),fname="qmckl_compute_tmp_c_doc") + #+begin_src c :tangle (eval h_private_func) :comments org +qmckl_exit_code qmckl_compute_tmp_c_doc ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + #+CALL: generate_c_interface(table=qmckl_factor_tmp_c_args,rettyp=get_value("FRetType"),fname="qmckl_compute_tmp_c_doc") + #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_tmp_c_doc & @@ -5508,6 +5490,7 @@ integer(c_int32_t) function qmckl_compute_tmp_c_doc & end function qmckl_compute_tmp_c_doc #+end_src +**** CPU :noexport: #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_tmp_c_hpc ( @@ -5559,16 +5542,15 @@ qmckl_exit_code qmckl_compute_tmp_c_hpc ( const int64_t bf = elec_num*nucl_num*(cord_num+1); const int64_t cf = bf; +#ifdef HAVE_OPENMP +#pragma omp parallel for collapse(2) +#endif for (int64_t nw=0; nw < walk_num; ++nw) { for (int64_t i=0; i Date: Wed, 6 Apr 2022 17:04:00 +0200 Subject: [PATCH 053/100] Ok for openmp and Cublas --- org/qmckl_jastrow.org | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 2ac3438..5d164ad 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5528,7 +5528,7 @@ qmckl_exit_code qmckl_compute_tmp_c_cuBlas ( const int64_t bf = elec_num*nucl_num*(cord_num+1); const int64_t cf = bf; - #pragma omp target enter data map(alloc:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num]) + #pragma omp target enter data map(to:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num]) #pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c) { From 9cef7048d3482a20a6150494bfa47554023d6a33 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 6 Apr 2022 17:10:23 +0200 Subject: [PATCH 054/100] Fix CI --- org/qmckl_jastrow.org | 2 -- 1 file changed, 2 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 9173a43..c4f2e28 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -1116,8 +1116,6 @@ qmckl_exit_code qmckl_finalize_jastrow(qmckl_context context) { /* Decide if the Jastrow if offloaded on GPU or not */ #if defined(HAVE_HPC) && (defined(HAVE_CUBLAS_OFFLOAD) || defined(HAVE_OPENACC_OFFLOAD) || defined(HAVE_OPENMP_OFFLOAD)) ctx->jastrow.gpu_offload = true; // ctx->electron.num > 100; -#else - ctx->jastrow.gpu_offload = false; #endif qmckl_exit_code rc = QMCKL_SUCCESS; From 3ea90bc4a5042b1e2f43bfc1af857cbe3efa00e8 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 6 Apr 2022 17:11:21 +0200 Subject: [PATCH 055/100] OpenMP --- org/qmckl_jastrow.org | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 9173a43..1fd0d47 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5969,13 +5969,13 @@ qmckl_compute_tmp_c_omp_offload (const qmckl_context context, const int64_t size_e = walk_num*(cord_num+1)*elec_num*elec_num; const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num; -#pragma omp parallel copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n]) - { -#pragma omp loop independent gang worker vector - for (int64_t i=0 ; i Date: Wed, 6 Apr 2022 17:58:05 +0200 Subject: [PATCH 056/100] First working OpenMP version --- org/qmckl_jastrow.org | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index dd2722a..79183a6 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5869,10 +5869,6 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload (const qmckl_context context, #pragma acc parallel copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n]) { -#pragma acc loop independent gang worker vector - for (int64_t i=0 ; i Date: Thu, 7 Apr 2022 13:33:50 +0200 Subject: [PATCH 057/100] Fix build --- org/qmckl_jastrow.org | 742 +++++++++++++++++++----------------------- 1 file changed, 334 insertions(+), 408 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 269d3fd..b9981b5 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -109,11 +109,6 @@ int main() { #include -#include -#include "cublas_v2.h" - - - #include #include "qmckl.h" @@ -122,6 +117,13 @@ int main() { #include "qmckl_memory_private_func.h" #include "qmckl_jastrow_private_func.h" #include "qmckl_jastrow_private_type.h" + +#ifdef HAVE_CUBLAS_OFFLOAD +#include +#include "cublas_v2.h" +#endif + + #+end_src * Context @@ -1123,7 +1125,7 @@ qmckl_exit_code qmckl_finalize_jastrow(qmckl_context context) { #if defined(HAVE_HPC) && (defined(HAVE_CUBLAS_OFFLOAD) || defined(HAVE_OPENACC_OFFLOAD) || defined(HAVE_OPENMP_OFFLOAD)) ctx->jastrow.gpu_offload = true; // ctx->electron.num > 100; #endif - + qmckl_exit_code rc = QMCKL_SUCCESS; return rc; @@ -1517,7 +1519,7 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( const int64_t bord_num, const double* bord_vector, const double rescale_factor_kappa_ee, - double* const asymp_jasb ); + double* const asymp_jasb ); #+end_src @@ -1808,21 +1810,21 @@ qmckl_exit_code qmckl_compute_factor_ee ( int ipar; // can we use a smaller integer? double x, x1, spin_fact, power_ser; - if (context == QMCKL_NULL_CONTEXT) { + if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; - } + } if (walk_num <= 0) { return QMCKL_INVALID_ARG_2; } - + if (elec_num <= 0) { return QMCKL_INVALID_ARG_3; - } + } if (bord_num <= 0) { return QMCKL_INVALID_ARG_4; - } + } for (int nw = 0; nw < walk_num; ++nw) { factor_ee[nw] = 0.0; // put init array here. @@ -1833,9 +1835,9 @@ qmckl_exit_code qmckl_compute_factor_ee ( x1 = x; power_ser = 0.0; spin_fact = 1.0; - ipar = 0; // index of asymp_jasb + ipar = 0; // index of asymp_jasb - for (int p = 1; p < bord_num; ++p) { + for (int p = 1; p < bord_num; ++p) { x = x * x1; power_ser = power_ser + bord_vector[p + 1] * x; } @@ -1844,7 +1846,7 @@ qmckl_exit_code qmckl_compute_factor_ee ( spin_fact = 0.5; ipar = 1; } - + factor_ee[nw] = factor_ee[nw] + spin_fact * bord_vector[0] * \ x1 / \ (1.0 + bord_vector[1] * \ @@ -1860,7 +1862,7 @@ qmckl_exit_code qmckl_compute_factor_ee ( #+end_src # #+CALL: generate_c_header(table=qmckl_factor_ee_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_factor_ee ( const qmckl_context context, @@ -1871,7 +1873,7 @@ qmckl_exit_code qmckl_compute_factor_ee ( const double* bord_vector, const double* ee_distance_rescaled, const double* asymp_jasb, - double* const factor_ee ); + double* const factor_ee ); #+end_src @@ -2183,7 +2185,7 @@ end function qmckl_compute_factor_ee_deriv_e_f const double* ee_distance_rescaled, const double* ee_distance_rescaled_deriv_e, const double* asymp_jasb, - double* const factor_ee_deriv_e ); + double* const factor_ee_deriv_e ); #+end_src @@ -2457,7 +2459,7 @@ qmckl_exit_code qmckl_provide_factor_en(qmckl_context context) if (rc != QMCKL_SUCCESS) { return rc; } - + ctx->jastrow.factor_en_date = ctx->date; } @@ -2556,7 +2558,7 @@ integer function qmckl_compute_factor_en_f( & end function qmckl_compute_factor_en_f #+end_src - + #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_factor_en ( @@ -2625,7 +2627,7 @@ qmckl_exit_code qmckl_compute_factor_en ( x1 = x; power_ser = 0.0; - for (int p = 2; p < aord_num+1; ++p) { + for (int p = 2; p < aord_num+1; ++p) { x = x * x1; power_ser = power_ser + aord_vector[(p+1)-1 + (type_nucl_vector[a]-1) * aord_num] * x; } @@ -2656,7 +2658,7 @@ qmckl_exit_code qmckl_compute_factor_en ( const int64_t aord_num, const double* aord_vector, const double* en_distance_rescaled, - double* const factor_en ); + double* const factor_en ); #+end_src @@ -2950,7 +2952,7 @@ end function qmckl_compute_factor_en_deriv_e_f const double* aord_vector, const double* en_distance_rescaled, const double* en_distance_rescaled_deriv_e, - double* const factor_en_deriv_e ); + double* const factor_en_deriv_e ); #+end_src @@ -3343,7 +3345,7 @@ end function qmckl_compute_een_rescaled_e_doc_f const int64_t cord_num, const double rescale_factor_kappa_ee, const double* ee_distance, - double* const een_rescaled_e ); + double* const een_rescaled_e ); #+end_src #+CALL: generate_c_interface(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname="qmckl_compute_een_rescaled_e_doc") @@ -3382,13 +3384,13 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( const double rescale_factor_kappa_ee, const double* ee_distance, double* const een_rescaled_e ) { - - double *een_rescaled_e_ij; + + double *een_rescaled_e_ij; double x; const int64_t elec_pairs = (elec_num * (elec_num - 1)) / 2; const int64_t len_een_ij = elec_pairs * (cord_num + 1); - int64_t k; - + int64_t k; + // number of element for the een_rescaled_e_ij[N_e*(N_e-1)/2][cord+1] // probably in C is better [cord+1, Ne*(Ne-1)/2] //elec_pairs = (elec_num * (elec_num - 1)) / 2; @@ -3397,7 +3399,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; - } + } if (walk_num <= 0) { return QMCKL_INVALID_ARG_2; @@ -3412,8 +3414,8 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( } // Prepare table of exponentiated distances raised to appropriate power - // init - + // init + for (int kk = 0; kk < walk_num*(cord_num+1)*elec_num*elec_num; ++kk) { een_rescaled_e[kk]= 0.0; } @@ -3431,14 +3433,14 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( */ for (int nw = 0; nw < walk_num; ++nw) { - + for (int kk = 0; kk < len_een_ij; ++kk) { // this array initialized at 0 except een_rescaled_e_ij(:, 1) = 1.0d0 // and the arrangement of indices is [cord_num+1, ne*(ne-1)/2] een_rescaled_e_ij[kk]= ( kk < (elec_pairs) ? 1.0 : 0.0 ); } - k = 0; + k = 0; for (int i = 0; i < elec_num; ++i) { for (int j = 0; j < i; ++j) { // een_rescaled_e_ij(k, 2) = dexp(-rescale_factor_kappa_ee * ee_distance(i, j, nw)); @@ -3456,7 +3458,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( een_rescaled_e_ij[k + elec_pairs]; } } - + // prepare the actual een table for (int i = 0; i < elec_num; ++i){ @@ -3464,7 +3466,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( een_rescaled_e[j + i*elec_num + 0 + nw*(cord_num+1)*elec_num*elec_num] = 1.0; } } - + // Up to here it should work. for ( int l = 1; l < (cord_num+1); ++l) { k = 0; @@ -3487,7 +3489,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( } free(een_rescaled_e_ij); - + return QMCKL_SUCCESS; } #+end_src @@ -3526,7 +3528,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( const double* ee_distance, double* const een_rescaled_e ); #+end_src - + #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_een_rescaled_e ( const qmckl_context context, @@ -3854,7 +3856,7 @@ end function qmckl_compute_factor_een_rescaled_e_deriv_e_f const double* coord_new, const double* ee_distance, const double* een_rescaled_e, - double* const een_rescaled_e_deriv_e ); + double* const een_rescaled_e_deriv_e ); #+end_src @@ -4213,7 +4215,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_n ( if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; - } + } if (walk_num <= 0) { return QMCKL_INVALID_ARG_2; @@ -4274,7 +4276,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_n ( const int64_t cord_num, const double rescale_factor_kappa_en, const double* en_distance, - double* const een_rescaled_n ); + double* const een_rescaled_n ); #+end_src *** Test @@ -4583,7 +4585,7 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f const double* coord, const double* en_distance, const double* een_rescaled_n, - double* const een_rescaled_n_deriv_e ); + double* const een_rescaled_n_deriv_e ); #+end_src #+CALL: generate_c_interface(table=qmckl_compute_factor_een_rescaled_n_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -5032,8 +5034,8 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) #else const bool gpu_offload = false; #endif - - if (gpu_offload) { + + if (gpu_offload) { #ifdef HAVE_CUBLAS_OFFLOAD rc = qmckl_compute_tmp_c_cublas_offload(context, ctx->jastrow.cord_num, @@ -5074,7 +5076,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.een_rescaled_n, ctx->jastrow.tmp_c); } - + ctx->jastrow.tmp_c_date = ctx->date; } @@ -5121,8 +5123,8 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) #else const bool gpu_offload = false; #endif - - if (gpu_offload) { + + if (gpu_offload) { #ifdef HAVE_CUBLAS_OFFLOAD rc = qmckl_compute_dtmp_c_cublas_offload(context, ctx->jastrow.cord_num, @@ -5238,10 +5240,10 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( const qmckl_context context, const int64_t cord_num, int64_t* const dim_cord_vect){ - + int lmax; - + if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; } @@ -5251,7 +5253,7 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( } *dim_cord_vect = 0; - + for (int p=2; p <= cord_num; ++p){ for (int k=p-1; k >= 0; --k) { if (k != 0) { @@ -5265,7 +5267,7 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( } } } - + return QMCKL_SUCCESS; } #+end_src @@ -5276,7 +5278,7 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( qmckl_exit_code qmckl_compute_dim_cord_vect ( const qmckl_context context, const int64_t cord_num, - int64_t* const dim_cord_vect ); + int64_t* const dim_cord_vect ); #+end_src @@ -5541,15 +5543,15 @@ qmckl_exit_code qmckl_compute_lkpm_combined_index ( int kk, lmax, m; - if (context == QMCKL_NULL_CONTEXT) { + if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; } - if (cord_num <= 0) { + if (cord_num <= 0) { return QMCKL_INVALID_ARG_2; } - if (dim_cord_vect <= 0) { + if (dim_cord_vect <= 0) { return QMCKL_INVALID_ARG_3; } @@ -5586,7 +5588,7 @@ qmckl_exit_code qmckl_compute_lkpm_combined_index ( const qmckl_context context, const int64_t cord_num, const int64_t dim_cord_vect, - int64_t* const lkpm_combined_index ); + int64_t* const lkpm_combined_index ); #+end_src @@ -5627,7 +5629,7 @@ qmckl_exit_code qmckl_compute_tmp_c (const qmckl_context context, #endif } #+end_src - + # #+CALL: generate_c_header(table=qmckl_factor_tmp_c_args,rettyp=get_value("CRetType"),fname="qmckl_compute_tmp_c") #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none @@ -5639,7 +5641,7 @@ qmckl_exit_code qmckl_compute_tmp_c (const qmckl_context context, const int64_t walk_num, const double* een_rescaled_e, const double* een_rescaled_n, - double* const tmp_c ); + double* const tmp_c ); #+end_src #+begin_src f90 :comments org :tangle (eval f) :noweb yes @@ -5719,11 +5721,11 @@ qmckl_exit_code qmckl_compute_tmp_c_doc ( const int64_t walk_num, const double* een_rescaled_e, const double* een_rescaled_n, - double* const tmp_c ); + double* const tmp_c ); #+end_src #+CALL: generate_c_interface(table=qmckl_factor_tmp_c_args,rettyp=get_value("FRetType"),fname="qmckl_compute_tmp_c_doc") - + #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_tmp_c_doc & @@ -5768,19 +5770,19 @@ qmckl_exit_code qmckl_compute_tmp_c_hpc ( if (cord_num <= 0) { return QMCKL_INVALID_ARG_2; - } + } if (elec_num <= 0) { return QMCKL_INVALID_ARG_3; - } + } if (nucl_num <= 0) { return QMCKL_INVALID_ARG_4; - } + } if (walk_num <= 0) { return QMCKL_INVALID_ARG_5; - } + } qmckl_exit_code info = QMCKL_SUCCESS; @@ -5818,16 +5820,264 @@ qmckl_exit_code qmckl_compute_tmp_c_hpc ( #+end_src + + #+CALL: generate_c_header(table=qmckl_factor_tmp_c_args,rettyp=get_value("CRetType"),fname="qmckl_compute_tmp_c") + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org +qmckl_exit_code qmckl_compute_tmp_c ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + +# #+CALL: generate_c_header(table=qmckl_factor_tmp_c_args,rettyp=get_value("CRetType"),fname="qmckl_compute_tmp_c_doc") + + #+RESULTS: + #+begin_src c :tangle (eval h_private_func) :comments org +qmckl_exit_code qmckl_compute_tmp_c_doc ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + +# #+CALL: generate_c_header(table=qmckl_factor_tmp_c_args,rettyp=get_value("CRetType"),fname="qmckl_compute_tmp_c_hpc") + + #+RESULTS: + + #+begin_src c :tangle (eval h_private_func) :comments org +qmckl_exit_code qmckl_compute_tmp_c_hpc (const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + +**** OpenACC offload :noexport: + + #+begin_src c :comments org :tangle (eval c) :noweb yes +#ifdef HAVE_OPENACC_OFFLOAD +qmckl_exit_code qmckl_compute_tmp_c_acc_offload (const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ) +{ + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (nucl_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + // Compute array access strides: + // For tmp_c... + const int64_t stride_k_c = elec_num; + const int64_t stride_j_c = stride_k_c * nucl_num; + const int64_t stride_i_c = stride_j_c * (cord_num+1); + const int64_t stride_nw_c = stride_i_c * cord_num; + // For een_rescaled_e... + const int64_t stride_m_e = elec_num; + const int64_t stride_i_e = stride_m_e * elec_num; + const int64_t stride_nw_e = stride_i_e * (cord_num+1); + // For een_rescaled_n... + const int64_t stride_k_n = elec_num; + const int64_t stride_j_n = stride_k_n * nucl_num; + const int64_t stride_nw_n = stride_j_n * (cord_num+1); + + const int64_t size_tmp_c = elec_num*nucl_num*(cord_num+1)*cord_num*walk_num; + const int64_t size_e = walk_num*(cord_num+1)*elec_num*elec_num; + const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num; + +#pragma acc parallel copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n]) + { +#pragma acc loop independent gang worker vector + for (int64_t i=0 ; i Date: Thu, 7 Apr 2022 13:57:20 +0200 Subject: [PATCH 058/100] Fix OpenACC and OpenMP implementations --- org/qmckl_jastrow.org | 146 +++++++++++++++++++----------------------- 1 file changed, 65 insertions(+), 81 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index b9981b5..e42a86e 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5915,36 +5915,31 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload (const qmckl_context context, const int64_t size_e = walk_num*(cord_num+1)*elec_num*elec_num; const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num; -#pragma acc parallel copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n]) + #pragma acc parallel copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n]) { -#pragma acc loop independent gang worker vector - for (int64_t i=0 ; i Date: Thu, 7 Apr 2022 15:41:22 +0200 Subject: [PATCH 059/100] test passed --- org/qmckl_jastrow.org | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 8940a90..393c76a 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -2154,8 +2154,6 @@ integer function qmckl_compute_factor_ee_deriv_e_doc_f( & do j = 1, elec_num do i = 1, elec_num x = ee_distance_rescaled(i,j,nw) - print *, tmp_kk, x - tmp_kk = tmp_kk + 1 if(abs(x) < 1.0d-18) cycle pow_ser_g = 0.0d0 spin_fact = 1.0d0 @@ -2194,7 +2192,9 @@ integer function qmckl_compute_factor_ee_deriv_e_doc_f( & factor_ee_deriv_e( j, ii, nw) = factor_ee_deriv_e( j, ii, nw) + spin_fact * bord_vector(1) * & dx(ii) * invden2 + pow_ser_g(ii) + print *, tmp_kk, factor_ee_deriv_e(j,ii,nw) end do + tmp_kk = tmp_kk + 1 ii = 4 lap2 = lap2 * dx(ii) * third @@ -2262,7 +2262,7 @@ qmckl_exit_code qmckl_compute_factor_ee_deriv_e_hpc( printf("%d\t\t\%lf\n", tmp_kk, factor_ee_deriv_e[tmp_kk]); } */ - + printf("%d\n", elec_num); third = 1.0 / 3.0; for (int nw = 0; nw < walk_num; ++nw) { @@ -2270,8 +2270,6 @@ qmckl_exit_code qmckl_compute_factor_ee_deriv_e_hpc( for (int i = 0; i < elec_num; ++i) { for (int j = 0; j < elec_num; ++j) { x = ee_distance_rescaled[j + i * elec_num + nw * elec_num * elec_num]; - printf("%d\t\t\%lf\n", tmp_kk, x); - tmp_kk = tmp_kk + 1; if (fabs(x) < 1.0e-18) continue; for (int ii = 0; ii < 3; ++ii){ pow_ser_g[ii] = 0.0; @@ -2284,30 +2282,16 @@ qmckl_exit_code qmckl_compute_factor_ee_deriv_e_hpc( xinv = 1.0 / (x + 1.0e-18); ipar = 0; - /* TEST - dx[0] = ee_distance_rescaled_deriv_e[j + i * elec_num \ - + 0 \ - + nw * elec_num * elec_num * 4]; - dx[1] = ee_distance_rescaled_deriv_e[j + i * elec_num \ - + 1 * elec_num * elec_num \ - + nw * elec_num * elec_num * 4]; - dx[2] = ee_distance_rescaled_deriv_e[j + i * elec_num \ - + 2 * elec_num * elec_num \ - + nw * elec_num * elec_num * 4]; - dx[3] = ee_distance_rescaled_deriv_e[j + i * elec_num \ - + 3 * elec_num * elec_num \ - + nw * elec_num * elec_num * 4]; - */ dx[0] = ee_distance_rescaled_deriv_e[0 \ + j * 4 + i * 4 * elec_num \ + nw * 4 * elec_num * elec_num]; dx[1] = ee_distance_rescaled_deriv_e[1 \ + j * 4 + i * 4 * elec_num \ + nw * 4 * elec_num * elec_num]; - dx[2] = ee_distance_rescaled_deriv_e[2 * (walk_num * elec_num * elec_num) \ + dx[2] = ee_distance_rescaled_deriv_e[2 \ + j * 4 + i * 4 * elec_num \ + nw * 4 * elec_num * elec_num]; - dx[3] = ee_distance_rescaled_deriv_e[3 * (walk_num * elec_num * elec_num) \ + dx[3] = ee_distance_rescaled_deriv_e[3 \ + j * 4 + i * 4 * elec_num \ + nw * 4 * elec_num * elec_num]; @@ -2332,16 +2316,18 @@ qmckl_exit_code qmckl_compute_factor_ee_deriv_e_hpc( lap3 = lap3 - 2.0 * bord_vector[1] * dx[ii] * dx[ii]; // IS IT "J" or "I"? I would say "I" - factor_ee_deriv_e[i + ii * elec_num * elec_num + nw * elec_num * elec_num * 4 ] += \ + factor_ee_deriv_e[i + ii * elec_num + nw * elec_num * 4 ] += \ + spin_fact * bord_vector[0] * dx[ii] * invden2 \ + pow_ser_g[ii] ; + printf("%d\t%lf\n", tmp_kk, factor_ee_deriv_e[i+ii*elec_num+nw*4*elec_num]); } + tmp_kk = tmp_kk + 1; ii = 3; lap2 = lap2 * dx[ii] * third; lap3 = lap3 + den * dx[ii]; lap3 = lap3 * (spin_fact * bord_vector[0] * invden3); - factor_ee_deriv_e[i + ii * elec_num *elec_num + nw * elec_num * elec_num * 4] += lap1 + lap2 + lap3; + factor_ee_deriv_e[i + ii*elec_num + nw * elec_num * 4] += lap1 + lap2 + lap3; } } @@ -2582,13 +2568,13 @@ double factor_ee_deriv_e[walk_num][4][elec_num]; rc = qmckl_get_jastrow_factor_ee_deriv_e(context, &(factor_ee_deriv_e[0][0][0]),walk_num*4*elec_num); // check factor_ee_deriv_e -/* DELETE FROM HERE assert(fabs(factor_ee_deriv_e[0][0][0]-0.16364894652107934) < 1.e-12); assert(fabs(factor_ee_deriv_e[0][1][0]+0.6927548119830084 ) < 1.e-12); assert(fabs(factor_ee_deriv_e[0][2][0]-0.073267755223968 ) < 1.e-12); assert(fabs(factor_ee_deriv_e[0][3][0]-1.5111672803213185 ) < 1.e-12); -TO HERE */ +/* DELETE FROM HERE return QMCKL_SUCCESS; +TO HERE */ #+end_src ** Electron-nucleus component \(f_{en}\) From 0a3f427acecf16fc3cbef020a74caa4e222b4b7b Mon Sep 17 00:00:00 2001 From: Gianfranco Abrusci Date: Thu, 7 Apr 2022 16:21:29 +0200 Subject: [PATCH 060/100] removed unused variable in doc and hpc of compute_factor_ee_deriv_e --- org/qmckl_jastrow.org | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 4834a7f..ed16889 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -2032,7 +2032,6 @@ qmckl_exit_code qmckl_provide_factor_ee_deriv_e(qmckl_context context) ctx->jastrow.bord_vector, ctx->electron.ee_distance_rescaled, ctx->electron.ee_distance_rescaled_deriv_e, - ctx->jastrow.asymp_jasb, ctx->jastrow.factor_ee_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; @@ -2063,14 +2062,13 @@ qmckl_exit_code qmckl_provide_factor_ee_deriv_e(qmckl_context context) | ~bord_vector~ | ~double[bord_num+1]~ | in | List of coefficients | | ~ee_distance_rescaled~ | ~double[walk_num][elec_num][elec_num]~ | in | Electron-electron distances | | ~ee_distance_rescaled_deriv_e~ | ~double[walk_num][4][elec_num][elec_num]~ | in | Electron-electron distances | - | ~asymp_jasb~ | ~double[2]~ | in | Electron-electron distances | | ~factor_ee_deriv_e~ | ~double[walk_num][4][elec_num]~ | out | Electron-electron distances | #+begin_src f90 :comments org :tangle (eval f) :noweb yes integer function qmckl_compute_factor_ee_deriv_e_doc_f( & context, walk_num, elec_num, up_num, bord_num, & bord_vector, ee_distance_rescaled, ee_distance_rescaled_deriv_e, & - asymp_jasb, factor_ee_deriv_e) & + factor_ee_deriv_e) & result(info) use qmckl implicit none @@ -2079,10 +2077,9 @@ integer function qmckl_compute_factor_ee_deriv_e_doc_f( & double precision , intent(in) :: bord_vector(bord_num + 1) double precision , intent(in) :: ee_distance_rescaled(elec_num, elec_num,walk_num) double precision , intent(in) :: ee_distance_rescaled_deriv_e(4,elec_num, elec_num,walk_num) !TODO - double precision , intent(in) :: asymp_jasb(2) double precision , intent(out) :: factor_ee_deriv_e(elec_num,4,walk_num) - integer*8 :: i, j, p, ipar, nw, ii + integer*8 :: i, j, p, nw, ii double precision :: x, spin_fact, y double precision :: den, invden, invden2, invden3, xinv double precision :: lap1, lap2, lap3, third @@ -2126,7 +2123,6 @@ integer function qmckl_compute_factor_ee_deriv_e_doc_f( & invden2 = invden * invden invden3 = invden2 * invden xinv = 1.0d0 / (x + 1.0d-18) - ipar = 1 dx(1) = ee_distance_rescaled_deriv_e(1, i, j, nw) dx(2) = ee_distance_rescaled_deriv_e(2, i, j, nw) @@ -2181,15 +2177,14 @@ qmckl_exit_code qmckl_compute_factor_ee_deriv_e_hpc( const double* bord_vector, const double* ee_distance_rescaled, const double* ee_distance_rescaled_deriv_e, - const double* asymp_jasb, double* const factor_ee_deriv_e ) { - int ipar, ii; - double pow_ser_g[3]; - double dx[4]; - double x, spin_fact, y; - double den, invden, invden2, invden3, xinv; - double lap1, lap2, lap3, third; + int64_t ii; + double pow_ser_g[3]; + double dx[4]; + double x, spin_fact, y; + double den, invden, invden2, invden3, xinv; + double lap1, lap2, lap3, third; if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; @@ -2232,7 +2227,6 @@ qmckl_exit_code qmckl_compute_factor_ee_deriv_e_hpc( invden2 = invden * invden; invden3 = invden2 * invden; xinv = 1.0 / (x + 1.0e-18); - ipar = 0; dx[0] = ee_distance_rescaled_deriv_e[0 \ + j * 4 + i * 4 * elec_num \ @@ -2299,7 +2293,6 @@ qmckl_exit_code qmckl_compute_factor_ee_deriv_e_hpc( const double* bord_vector, const double* ee_distance_rescaled, const double* ee_distance_rescaled_deriv_e, - const double* asymp_jasb, double* const factor_ee_deriv_e ); #+end_src @@ -2317,7 +2310,6 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e_doc & bord_vector, & ee_distance_rescaled, & ee_distance_rescaled_deriv_e, & - asymp_jasb, & factor_ee_deriv_e) & bind(C) result(info) @@ -2332,7 +2324,6 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e_doc & real (c_double ) , intent(in) :: bord_vector(bord_num+1) real (c_double ) , intent(in) :: ee_distance_rescaled(elec_num,elec_num,walk_num) real (c_double ) , intent(in) :: ee_distance_rescaled_deriv_e(elec_num,elec_num,4,walk_num) - real (c_double ) , intent(in) :: asymp_jasb(2) real (c_double ) , intent(out) :: factor_ee_deriv_e(elec_num,4,walk_num) integer(c_int32_t), external :: qmckl_compute_factor_ee_deriv_e_doc_f @@ -2345,7 +2336,6 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e_doc & bord_vector, & ee_distance_rescaled, & ee_distance_rescaled_deriv_e, & - asymp_jasb, & factor_ee_deriv_e) end function qmckl_compute_factor_ee_deriv_e_doc @@ -2361,7 +2351,6 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e_doc & const double* bord_vector, const double* ee_distance_rescaled, const double* ee_distance_rescaled_deriv_e, - const double* asymp_jasb, double* const factor_ee_deriv_e ); #+end_src @@ -2375,7 +2364,6 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e_doc & const double* bord_vector, const double* ee_distance_rescaled, const double* ee_distance_rescaled_deriv_e, - const double* asymp_jasb, double* const factor_ee_deriv_e ); #+end_src @@ -2389,13 +2377,12 @@ integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e_doc & const double* bord_vector, const double* ee_distance_rescaled, const double* ee_distance_rescaled_deriv_e, - const double* asymp_jasb, double* const factor_ee_deriv_e ) { #ifdef HAVE_HPC - return qmckl_compute_factor_ee_deriv_e_hpc(context, walk_num, elec_num, up_num, bord_num, bord_vector, ee_distance_rescaled, ee_distance_rescaled_deriv_e, asymp_jasb, factor_ee_deriv_e ); + return qmckl_compute_factor_ee_deriv_e_hpc(context, walk_num, elec_num, up_num, bord_num, bord_vector, ee_distance_rescaled, ee_distance_rescaled_deriv_e, factor_ee_deriv_e ); #else - return qmckl_compute_factor_ee_deriv_e_doc(context, walk_num, elec_num, up_num, bord_num, bord_vector, ee_distance_rescaled, ee_distance_rescaled_deriv_e, asymp_jasb, factor_ee_deriv_e ); + return qmckl_compute_factor_ee_deriv_e_doc(context, walk_num, elec_num, up_num, bord_num, bord_vector, ee_distance_rescaled, ee_distance_rescaled_deriv_e, factor_ee_deriv_e ); #endif } #+end_src From 47d63aa9d3c9530e62e1378d02f02771eaa1ce69 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 7 Apr 2022 17:02:36 +0200 Subject: [PATCH 061/100] Fix cublas --- configure.ac | 9 +++++---- org/qmckl_jastrow.org | 40 +++++++++++++++++++++++++--------------- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/configure.ac b/configure.ac index d3c9471..055ca86 100644 --- a/configure.ac +++ b/configure.ac @@ -310,20 +310,21 @@ AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ case $CC in *gcc*) - CFLAGS="$CFLAGS -fopenacc" + CFLAGS="$CFLAGS -fopenmp" + LDFLAGS="-lcublas" ;; *nvc*) - CFLAGS="$CFLAGS -acc=gpu" + CFLAGS="$CFLAGS -mp=gpu -cudalib=cublas" ;; esac case $FC in *gfortran*) - FCFLAGS="$FCFLAGS -fopenacc" + FCFLAGS="$FCFLAGS -fopenmp" ;; *nvfortran*) - FCFLAGS="$FCFLAGS -acc=gpu" + FCFLAGS="$FCFLAGS -mp=gpu -cudalib=cublas" ;; esac ]) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index e1fc423..1e17c11 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -5870,14 +5870,15 @@ qmckl_exit_code qmckl_compute_tmp_c_hpc (const qmckl_context context, #+begin_src c :comments org :tangle (eval c) :noweb yes #ifdef HAVE_OPENACC_OFFLOAD -qmckl_exit_code qmckl_compute_tmp_c_acc_offload (const qmckl_context context, - const int64_t cord_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t walk_num, - const double* een_rescaled_e, - const double* een_rescaled_n, - double* const tmp_c ) +qmckl_exit_code +qmckl_compute_tmp_c_acc_offload (const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ) { if (context == QMCKL_NULL_CONTEXT) { @@ -6062,6 +6063,7 @@ qmckl_compute_tmp_c_omp_offload (const qmckl_context context, #+begin_src c :comments org :tangle (eval c) :noweb yes #ifdef HAVE_CUBLAS_OFFLOAD +qmckl_exit_code qmckl_compute_tmp_c_cublas_offload (const qmckl_context context, const int64_t cord_num, const int64_t elec_num, @@ -6116,16 +6118,19 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context, const int64_t bf = elec_num*nucl_num*(cord_num+1); const int64_t cf = bf; + info = QMCKL_SUCCESS; + + #pragma omp target enter data map(to:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num]) #pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c) { - +#pragma omp target teams distribute parallel for collapse(2) for (int nw=0; nw < walk_num; ++nw) { for (int i=0; i Date: Thu, 7 Apr 2022 18:44:59 +0200 Subject: [PATCH 062/100] Add cublas batched --- org/qmckl_jastrow.org | 195 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 159 insertions(+), 36 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 269d3fd..e13498e 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -110,7 +110,7 @@ int main() { #include -#include "cublas_v2.h" +#include @@ -5032,10 +5032,10 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) #else const bool gpu_offload = false; #endif - - if (gpu_offload) { + + if (gpu_offload) { #ifdef HAVE_CUBLAS_OFFLOAD - rc = qmckl_compute_tmp_c_cublas_offload(context, + rc = qmckl_compute_tmp_c_cuBlas(context, ctx->jastrow.cord_num, ctx->electron.num, ctx->nucleus.num, @@ -5074,7 +5074,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.een_rescaled_n, ctx->jastrow.tmp_c); } - + ctx->jastrow.tmp_c_date = ctx->date; } @@ -5121,10 +5121,10 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) #else const bool gpu_offload = false; #endif - - if (gpu_offload) { + + if (gpu_offload) { #ifdef HAVE_CUBLAS_OFFLOAD - rc = qmckl_compute_dtmp_c_cublas_offload(context, + rc = qmckl_compute_dtmp_c_cuBlas(context, ctx->jastrow.cord_num, ctx->electron.num, ctx->nucleus.num, @@ -5829,6 +5829,93 @@ qmckl_exit_code qmckl_compute_tmp_c_cuBlas ( const double* een_rescaled_n, double* const tmp_c ) { + qmckl_exit_code info; + + //Initialisation of cublas + + cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) + { + fprintf(stdout, "CUBLAS initialization failed!\n"); + exit(EXIT_FAILURE); + } + + + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (nucl_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + const double alpha = 1.0; + const double beta = 0.0; + + const int64_t M = elec_num; + const int64_t N = nucl_num*(cord_num + 1); + const int64_t K = elec_num; + + const int64_t LDA = elec_num; + const int64_t LDB = elec_num; + const int64_t LDC = elec_num; + + const int64_t af = elec_num*elec_num; + const int64_t bf = elec_num*nucl_num*(cord_num+1); + const int64_t cf = bf; + + #pragma omp target enter data map(to:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num]) + #pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c) + { + for (int nw=0; nw < walk_num; ++nw) { + + int cublasError = cublasDgemmStridedBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha, + &(een_rescaled_e[nw*(cord_num+1)]), \ + LDA, af, \ + &(een_rescaled_n[bf*nw]), \ + LDB, 0, \ + &beta, \ + &(tmp_c[nw*cord_num]), \ + LDC, cf, cord_num); + + //Manage cublas ERROR + if(cublasError != CUBLAS_STATUS_SUCCESS){ + printf("CUBLAS ERROR %d", cublasError); + info = QMCKL_FAILURE; + }else{ + info = QMCKL_SUCCESS; + } + } + } + cublasDestroy(handle); + #pragma omp target exit data map(from:tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num]) + + return info; + } +#+end_src + + + +#+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_tmp_c_cuBlas_batched ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ) { + qmckl_exit_code info; //Initialisation of cublas @@ -6708,40 +6795,47 @@ qmckl_exit_code qmckl_compute_dtmp_c_omp_offload ( #+begin_src c :comments org :tangle (eval c) :noweb yes #ifdef HAVE_CUBLAS_OFFLOAD -qmckl_exit_code qmckl_compute_dtmp_c_cublas_offload ( - const qmckl_context context, - const int64_t cord_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t walk_num, - const double* een_rescaled_e_deriv_e, - const double* een_rescaled_n, - double* const dtmp_c ) { +qmckl_exit_code qmckl_compute_dtmp_c_cuBlas (const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e_deriv_e, + const double* een_rescaled_n, + double* const dtmp_c ) +{ + + cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) + { + fprintf(stdout, "CUBLAS initialization failed!\n"); + exit(EXIT_FAILURE); + } + + if (context == QMCKL_NULL_CONTEXT) { - return QMCKL_INVALID_CONTEXT; + return QMCKL_INVALID_CONTEXT; } if (cord_num <= 0) { - return QMCKL_INVALID_ARG_2; + return QMCKL_INVALID_ARG_2; } if (elec_num <= 0) { - return QMCKL_INVALID_ARG_3; + return QMCKL_INVALID_ARG_3; } if (nucl_num <= 0) { - return QMCKL_INVALID_ARG_4; + return QMCKL_INVALID_ARG_4; } if (walk_num <= 0) { - return QMCKL_INVALID_ARG_5; + return QMCKL_INVALID_ARG_5; } qmckl_exit_code info = QMCKL_SUCCESS; - const char TransA = 'N'; - const char TransB = 'N'; const double alpha = 1.0; const double beta = 0.0; @@ -6757,19 +6851,48 @@ qmckl_exit_code qmckl_compute_dtmp_c_cublas_offload ( const int64_t bf = elec_num*nucl_num*(cord_num+1); const int64_t cf = elec_num*4*nucl_num*(cord_num+1); - // TODO Replace with calls to cuBLAS - for (int64_t nw=0; nw < walk_num; ++nw) { - for (int64_t i=0; i < cord_num; ++i) { - info = qmckl_dgemm(context, TransA, TransB, M, N, K, alpha, \ - &(een_rescaled_e_deriv_e[af*(i+nw*(cord_num+1))]), \ - LDA, \ - &(een_rescaled_n[bf*nw]), \ - LDB, \ - beta, \ - &(dtmp_c[cf*(i+nw*cord_num)]), \ - LDC); +#pragma omp target enter data map(to:een_rescaled_e_deriv_e[0:elec_num*4*elec_num*(cord_num+1)*walk_num], een_rescaled_n[0:elec_num*nucl_num*(cord_num+1)*walk_num], dtmp_c[0:elec_num*4*nucl_num*(cord_num+1)*cord_num*walk_num]) +#pragma omp target data use_device_ptr(een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c) + { + for (int64_t nw=0; nw < walk_num; ++nw) { + /* + for (int64_t i=0; i < cord_num; ++i) { + int cublasError = cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha, \ + &(een_rescaled_e_deriv_e[af*(i+nw*(cord_num+1))]), \ + LDA, \ + &(een_rescaled_n[bf*nw]), \ + LDB, \ + &beta, \ + &(dtmp_c[cf*(i+nw*cord_num)]), \ + LDC); + ,*/ + //Manage CUBLAS ERRORS + + int cublasError = cublasDgemmStridedBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha, \ + &(een_rescaled_e_deriv_e[(nw*(cord_num+1))]), \ + LDA, af, \ + &(een_rescaled_n[bf*nw]), \ + LDB, 0, \ + &beta, \ + &(dtmp_c[(nw*cord_num)]), \ + LDC, cf, cord_num); + + + if(cublasError != CUBLAS_STATUS_SUCCESS){ + printf("CUBLAS ERROR %d", cublasError); + info = QMCKL_FAILURE; + return info; + }else{ + info = QMCKL_SUCCESS; + } + + //} } } + cudaDeviceSynchronize(); + cublasDestroy(handle); + +#pragma omp target exit data map(from:dtmp_c[0:cf*cord_num*walk_num]) return info; } @@ -6779,7 +6902,7 @@ qmckl_exit_code qmckl_compute_dtmp_c_cublas_offload ( #+RESULTS: #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none #ifdef HAVE_CUBLAS_OFFLOAD - qmckl_exit_code qmckl_compute_dtmp_c_cublas_offload ( + qmckl_exit_code qmckl_compute_dtmp_c_cuBlas ( const qmckl_context context, const int64_t cord_num, const int64_t elec_num, From 07cc64bb3115617ccbb4c815c50bd68172cd96b4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 8 Apr 2022 10:32:38 +0200 Subject: [PATCH 063/100] Changed enable-cublas into with-cublas --- configure.ac | 2 +- org/qmckl_jastrow.org | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index 055ca86..0ec94b8 100644 --- a/configure.ac +++ b/configure.ac @@ -304,7 +304,7 @@ AS_IF([test "$enable_gpu" = "openacc"], [ ]) # cuBLAS offloading -AC_ARG_ENABLE(cublas, [AS_HELP_STRING([--enable-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$enableval, HAVE_CUBLAS_OFFLOAD=no) +AC_ARG_WITH(cublas, [AS_HELP_STRING([--with-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$withval, HAVE_CUBLAS_OFFLOAD=no) AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines]) case $CC in diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 6191ba5..b3ec515 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -11,7 +11,7 @@ \[ J(\mathbf{r},\mathbf{R}) = J_{\text{eN}}(\mathbf{r},\mathbf{R}) + J_{\text{ee}}(\mathbf{r}) + J_{\text{eeN}}(\mathbf{r},\mathbf{R}) \] - + In the following, we us the notations $r_{ij} = |\mathbf{r}_i - \mathbf{r}_j|$ and $R_{i\alpha} = |\mathbf{r}_i - \mathbf{R}_\alpha|$. @@ -58,7 +58,6 @@ The terms $J_{\text{ee}}^\infty$ and $J_{\text{eN}}^\infty$ are shifts to ensure that $J_{\text{ee}}$ and $J_{\text{eN}}$ have an asymptotic value of zero. - * Headers :noexport: #+begin_src elisp :noexport :results none (org-babel-lob-ingest "../tools/lib.org") From d4f0ccee3b28d30319afc22e74f923ce5e4a4930 Mon Sep 17 00:00:00 2001 From: hoffer Date: Fri, 8 Apr 2022 10:44:48 +0200 Subject: [PATCH 064/100] Add cublas batch Dgemm --- org/qmckl_jastrow.org | 1134 ++++++++++++++++++++--------------------- 1 file changed, 547 insertions(+), 587 deletions(-) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index e13498e..dc58d48 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -109,11 +109,6 @@ int main() { #include -#include -#include - - - #include #include "qmckl.h" @@ -122,6 +117,13 @@ int main() { #include "qmckl_memory_private_func.h" #include "qmckl_jastrow_private_func.h" #include "qmckl_jastrow_private_type.h" + +#ifdef HAVE_CUBLAS_OFFLOAD +#include +#include "cublas_v2.h" +#endif + + #+end_src * Context @@ -1123,7 +1125,7 @@ qmckl_exit_code qmckl_finalize_jastrow(qmckl_context context) { #if defined(HAVE_HPC) && (defined(HAVE_CUBLAS_OFFLOAD) || defined(HAVE_OPENACC_OFFLOAD) || defined(HAVE_OPENMP_OFFLOAD)) ctx->jastrow.gpu_offload = true; // ctx->electron.num > 100; #endif - + qmckl_exit_code rc = QMCKL_SUCCESS; return rc; @@ -1517,7 +1519,7 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( const int64_t bord_num, const double* bord_vector, const double rescale_factor_kappa_ee, - double* const asymp_jasb ); + double* const asymp_jasb ); #+end_src @@ -1808,21 +1810,21 @@ qmckl_exit_code qmckl_compute_factor_ee ( int ipar; // can we use a smaller integer? double x, x1, spin_fact, power_ser; - if (context == QMCKL_NULL_CONTEXT) { + if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; - } + } if (walk_num <= 0) { return QMCKL_INVALID_ARG_2; } - + if (elec_num <= 0) { return QMCKL_INVALID_ARG_3; - } + } if (bord_num <= 0) { return QMCKL_INVALID_ARG_4; - } + } for (int nw = 0; nw < walk_num; ++nw) { factor_ee[nw] = 0.0; // put init array here. @@ -1833,9 +1835,9 @@ qmckl_exit_code qmckl_compute_factor_ee ( x1 = x; power_ser = 0.0; spin_fact = 1.0; - ipar = 0; // index of asymp_jasb + ipar = 0; // index of asymp_jasb - for (int p = 1; p < bord_num; ++p) { + for (int p = 1; p < bord_num; ++p) { x = x * x1; power_ser = power_ser + bord_vector[p + 1] * x; } @@ -1844,7 +1846,7 @@ qmckl_exit_code qmckl_compute_factor_ee ( spin_fact = 0.5; ipar = 1; } - + factor_ee[nw] = factor_ee[nw] + spin_fact * bord_vector[0] * \ x1 / \ (1.0 + bord_vector[1] * \ @@ -1860,7 +1862,7 @@ qmckl_exit_code qmckl_compute_factor_ee ( #+end_src # #+CALL: generate_c_header(table=qmckl_factor_ee_args,rettyp=get_value("CRetType"),fname=get_value("Name")) - + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_compute_factor_ee ( const qmckl_context context, @@ -1871,7 +1873,7 @@ qmckl_exit_code qmckl_compute_factor_ee ( const double* bord_vector, const double* ee_distance_rescaled, const double* asymp_jasb, - double* const factor_ee ); + double* const factor_ee ); #+end_src @@ -2030,7 +2032,6 @@ qmckl_exit_code qmckl_provide_factor_ee_deriv_e(qmckl_context context) ctx->jastrow.bord_vector, ctx->electron.ee_distance_rescaled, ctx->electron.ee_distance_rescaled_deriv_e, - ctx->jastrow.asymp_jasb, ctx->jastrow.factor_ee_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; @@ -2061,14 +2062,13 @@ qmckl_exit_code qmckl_provide_factor_ee_deriv_e(qmckl_context context) | ~bord_vector~ | ~double[bord_num+1]~ | in | List of coefficients | | ~ee_distance_rescaled~ | ~double[walk_num][elec_num][elec_num]~ | in | Electron-electron distances | | ~ee_distance_rescaled_deriv_e~ | ~double[walk_num][4][elec_num][elec_num]~ | in | Electron-electron distances | - | ~asymp_jasb~ | ~double[2]~ | in | Electron-electron distances | | ~factor_ee_deriv_e~ | ~double[walk_num][4][elec_num]~ | out | Electron-electron distances | #+begin_src f90 :comments org :tangle (eval f) :noweb yes -integer function qmckl_compute_factor_ee_deriv_e_f( & +integer function qmckl_compute_factor_ee_deriv_e_doc_f( & context, walk_num, elec_num, up_num, bord_num, & bord_vector, ee_distance_rescaled, ee_distance_rescaled_deriv_e, & - asymp_jasb, factor_ee_deriv_e) & + factor_ee_deriv_e) & result(info) use qmckl implicit none @@ -2077,10 +2077,9 @@ integer function qmckl_compute_factor_ee_deriv_e_f( & double precision , intent(in) :: bord_vector(bord_num + 1) double precision , intent(in) :: ee_distance_rescaled(elec_num, elec_num,walk_num) double precision , intent(in) :: ee_distance_rescaled_deriv_e(4,elec_num, elec_num,walk_num) !TODO - double precision , intent(in) :: asymp_jasb(2) double precision , intent(out) :: factor_ee_deriv_e(elec_num,4,walk_num) - integer*8 :: i, j, p, ipar, nw, ii + integer*8 :: i, j, p, nw, ii double precision :: x, spin_fact, y double precision :: den, invden, invden2, invden3, xinv double precision :: lap1, lap2, lap3, third @@ -2124,7 +2123,6 @@ integer function qmckl_compute_factor_ee_deriv_e_f( & invden2 = invden * invden invden3 = invden2 * invden xinv = 1.0d0 / (x + 1.0d-18) - ipar = 1 dx(1) = ee_distance_rescaled_deriv_e(1, i, j, nw) dx(2) = ee_distance_rescaled_deriv_e(2, i, j, nw) @@ -2166,7 +2164,120 @@ integer function qmckl_compute_factor_ee_deriv_e_f( & end do end do -end function qmckl_compute_factor_ee_deriv_e_f +end function qmckl_compute_factor_ee_deriv_e_doc_f + #+end_src + + #+begin_src c :comments org :tangle (eval c) :noweb yes +qmckl_exit_code qmckl_compute_factor_ee_deriv_e_hpc( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* ee_distance_rescaled_deriv_e, + double* const factor_ee_deriv_e ) { + + int64_t ii; + double pow_ser_g[3]; + double dx[4]; + double x, spin_fact, y; + double den, invden, invden2, invden3, xinv; + double lap1, lap2, lap3, third; + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (walk_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (bord_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + + for (int nw = 0; nw < walk_num; ++nw) { + for (int ii = 0; ii < 4; ++ii) { + for (int j = 0; j < elec_num; ++j) { + factor_ee_deriv_e[j + ii * elec_num + nw * elec_num * 4] = 0.0; + } + } + } + + third = 1.0 / 3.0; + + for (int nw = 0; nw < walk_num; ++nw) { + for (int i = 0; i < elec_num; ++i) { + for (int j = 0; j < elec_num; ++j) { + x = ee_distance_rescaled[j + i * elec_num + nw * elec_num * elec_num]; + if (fabs(x) < 1.0e-18) continue; + for (int ii = 0; ii < 3; ++ii){ + pow_ser_g[ii] = 0.0; + } + spin_fact = 1.0; + den = 1.0 + bord_vector[1] * x; + invden = 1.0 / den; + invden2 = invden * invden; + invden3 = invden2 * invden; + xinv = 1.0 / (x + 1.0e-18); + + dx[0] = ee_distance_rescaled_deriv_e[0 \ + + j * 4 + i * 4 * elec_num \ + + nw * 4 * elec_num * elec_num]; + dx[1] = ee_distance_rescaled_deriv_e[1 \ + + j * 4 + i * 4 * elec_num \ + + nw * 4 * elec_num * elec_num]; + dx[2] = ee_distance_rescaled_deriv_e[2 \ + + j * 4 + i * 4 * elec_num \ + + nw * 4 * elec_num * elec_num]; + dx[3] = ee_distance_rescaled_deriv_e[3 \ + + j * 4 + i * 4 * elec_num \ + + nw * 4 * elec_num * elec_num]; + + if((i <= (up_num-1) && j <= (up_num-1) ) || (i > (up_num-1) && j > (up_num-1))) { + spin_fact = 0.5; + } + + lap1 = 0.0; + lap2 = 0.0; + lap3 = 0.0; + for (int ii = 0; ii < 3; ++ii) { + x = ee_distance_rescaled[j + i * elec_num + nw * elec_num * elec_num]; + if (fabs(x) < 1.0e-18) continue; + for (int p = 2; p < bord_num+1; ++p) { + y = p * bord_vector[(p-1) + 1] * x; + pow_ser_g[ii] = pow_ser_g[ii] + y * dx[ii]; + lap1 = lap1 + (p - 1) * y * xinv * dx[ii] * dx[ii]; + lap2 = lap2 + y; + x = x * ee_distance_rescaled[j + i * elec_num + nw * elec_num * elec_num]; + } + + lap3 = lap3 - 2.0 * bord_vector[1] * dx[ii] * dx[ii]; + + factor_ee_deriv_e[i + ii * elec_num + nw * elec_num * 4 ] += \ + + spin_fact * bord_vector[0] * dx[ii] * invden2 \ + + pow_ser_g[ii] ; + } + + ii = 3; + lap2 = lap2 * dx[ii] * third; + lap3 = lap3 + den * dx[ii]; + lap3 = lap3 * (spin_fact * bord_vector[0] * invden3); + factor_ee_deriv_e[i + ii*elec_num + nw * elec_num * 4] += lap1 + lap2 + lap3; + + } + } + } + + return QMCKL_SUCCESS; +} #+end_src # #+CALL: generate_c_header(table=qmckl_factor_ee_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -2182,17 +2293,16 @@ end function qmckl_compute_factor_ee_deriv_e_f const double* bord_vector, const double* ee_distance_rescaled, const double* ee_distance_rescaled_deriv_e, - const double* asymp_jasb, - double* const factor_ee_deriv_e ); + double* const factor_ee_deriv_e ); #+end_src - #+CALL: generate_c_interface(table=qmckl_factor_ee_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) + #+CALL: generate_c_interface(table=qmckl_factor_ee_deriv_e_args,rettyp=get_value("CRetType"),fname="qmckl_compute_factor_ee_deriv_e_doc") #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none - integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e & - (context, & +integer(c_int32_t) function qmckl_compute_factor_ee_deriv_e_doc & + (context, & walk_num, & elec_num, & up_num, & @@ -2200,7 +2310,6 @@ end function qmckl_compute_factor_ee_deriv_e_f bord_vector, & ee_distance_rescaled, & ee_distance_rescaled_deriv_e, & - asymp_jasb, & factor_ee_deriv_e) & bind(C) result(info) @@ -2215,12 +2324,11 @@ end function qmckl_compute_factor_ee_deriv_e_f real (c_double ) , intent(in) :: bord_vector(bord_num+1) real (c_double ) , intent(in) :: ee_distance_rescaled(elec_num,elec_num,walk_num) real (c_double ) , intent(in) :: ee_distance_rescaled_deriv_e(elec_num,elec_num,4,walk_num) - real (c_double ) , intent(in) :: asymp_jasb(2) real (c_double ) , intent(out) :: factor_ee_deriv_e(elec_num,4,walk_num) - integer(c_int32_t), external :: qmckl_compute_factor_ee_deriv_e_f - info = qmckl_compute_factor_ee_deriv_e_f & - (context, & + integer(c_int32_t), external :: qmckl_compute_factor_ee_deriv_e_doc_f + info = qmckl_compute_factor_ee_deriv_e_doc_f & + (context, & walk_num, & elec_num, & up_num, & @@ -2228,11 +2336,60 @@ end function qmckl_compute_factor_ee_deriv_e_f bord_vector, & ee_distance_rescaled, & ee_distance_rescaled_deriv_e, & - asymp_jasb, & factor_ee_deriv_e) - end function qmckl_compute_factor_ee_deriv_e + end function qmckl_compute_factor_ee_deriv_e_doc #+end_src + + #+begin_src c :tangle (eval h_private_func) :comments org + qmckl_exit_code qmckl_compute_factor_ee_deriv_e_hpc ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* ee_distance_rescaled_deriv_e, + double* const factor_ee_deriv_e ); + #+end_src + + #+begin_src c :tangle (eval h_private_func) :comments org + qmckl_exit_code qmckl_compute_factor_ee_deriv_e_doc ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* ee_distance_rescaled_deriv_e, + double* const factor_ee_deriv_e ); + #+end_src + + #+begin_src c :comments org :tangle (eval c) :noweb yes + qmckl_exit_code qmckl_compute_factor_ee_deriv_e ( + const qmckl_context context, + const int64_t walk_num, + const int64_t elec_num, + const int64_t up_num, + const int64_t bord_num, + const double* bord_vector, + const double* ee_distance_rescaled, + const double* ee_distance_rescaled_deriv_e, + double* const factor_ee_deriv_e ) { + + #ifdef HAVE_HPC + return qmckl_compute_factor_ee_deriv_e_hpc(context, walk_num, elec_num, up_num, bord_num, bord_vector, ee_distance_rescaled, ee_distance_rescaled_deriv_e, factor_ee_deriv_e ); + #else + return qmckl_compute_factor_ee_deriv_e_doc(context, walk_num, elec_num, up_num, bord_num, bord_vector, ee_distance_rescaled, ee_distance_rescaled_deriv_e, factor_ee_deriv_e ); + #endif +} + #+end_src + + + + *** Test #+begin_src python :results output :exports none :noweb yes @@ -2351,7 +2508,6 @@ assert(fabs(factor_ee_deriv_e[0][0][0]-0.16364894652107934) < 1.e-12); assert(fabs(factor_ee_deriv_e[0][1][0]+0.6927548119830084 ) < 1.e-12); assert(fabs(factor_ee_deriv_e[0][2][0]-0.073267755223968 ) < 1.e-12); assert(fabs(factor_ee_deriv_e[0][3][0]-1.5111672803213185 ) < 1.e-12); - #+end_src ** Electron-nucleus component \(f_{en}\) @@ -2457,7 +2613,7 @@ qmckl_exit_code qmckl_provide_factor_en(qmckl_context context) if (rc != QMCKL_SUCCESS) { return rc; } - + ctx->jastrow.factor_en_date = ctx->date; } @@ -2556,7 +2712,7 @@ integer function qmckl_compute_factor_en_f( & end function qmckl_compute_factor_en_f #+end_src - + #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_factor_en ( @@ -2625,7 +2781,7 @@ qmckl_exit_code qmckl_compute_factor_en ( x1 = x; power_ser = 0.0; - for (int p = 2; p < aord_num+1; ++p) { + for (int p = 2; p < aord_num+1; ++p) { x = x * x1; power_ser = power_ser + aord_vector[(p+1)-1 + (type_nucl_vector[a]-1) * aord_num] * x; } @@ -2656,7 +2812,7 @@ qmckl_exit_code qmckl_compute_factor_en ( const int64_t aord_num, const double* aord_vector, const double* en_distance_rescaled, - double* const factor_en ); + double* const factor_en ); #+end_src @@ -2950,7 +3106,7 @@ end function qmckl_compute_factor_en_deriv_e_f const double* aord_vector, const double* en_distance_rescaled, const double* en_distance_rescaled_deriv_e, - double* const factor_en_deriv_e ); + double* const factor_en_deriv_e ); #+end_src @@ -3343,7 +3499,7 @@ end function qmckl_compute_een_rescaled_e_doc_f const int64_t cord_num, const double rescale_factor_kappa_ee, const double* ee_distance, - double* const een_rescaled_e ); + double* const een_rescaled_e ); #+end_src #+CALL: generate_c_interface(table=qmckl_factor_een_rescaled_e_args,rettyp=get_value("CRetType"),fname="qmckl_compute_een_rescaled_e_doc") @@ -3382,13 +3538,13 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( const double rescale_factor_kappa_ee, const double* ee_distance, double* const een_rescaled_e ) { - - double *een_rescaled_e_ij; + + double *een_rescaled_e_ij; double x; const int64_t elec_pairs = (elec_num * (elec_num - 1)) / 2; const int64_t len_een_ij = elec_pairs * (cord_num + 1); - int64_t k; - + int64_t k; + // number of element for the een_rescaled_e_ij[N_e*(N_e-1)/2][cord+1] // probably in C is better [cord+1, Ne*(Ne-1)/2] //elec_pairs = (elec_num * (elec_num - 1)) / 2; @@ -3397,7 +3553,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; - } + } if (walk_num <= 0) { return QMCKL_INVALID_ARG_2; @@ -3412,8 +3568,8 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( } // Prepare table of exponentiated distances raised to appropriate power - // init - + // init + for (int kk = 0; kk < walk_num*(cord_num+1)*elec_num*elec_num; ++kk) { een_rescaled_e[kk]= 0.0; } @@ -3431,14 +3587,14 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( */ for (int nw = 0; nw < walk_num; ++nw) { - + for (int kk = 0; kk < len_een_ij; ++kk) { // this array initialized at 0 except een_rescaled_e_ij(:, 1) = 1.0d0 // and the arrangement of indices is [cord_num+1, ne*(ne-1)/2] een_rescaled_e_ij[kk]= ( kk < (elec_pairs) ? 1.0 : 0.0 ); } - k = 0; + k = 0; for (int i = 0; i < elec_num; ++i) { for (int j = 0; j < i; ++j) { // een_rescaled_e_ij(k, 2) = dexp(-rescale_factor_kappa_ee * ee_distance(i, j, nw)); @@ -3456,7 +3612,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( een_rescaled_e_ij[k + elec_pairs]; } } - + // prepare the actual een table for (int i = 0; i < elec_num; ++i){ @@ -3464,7 +3620,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( een_rescaled_e[j + i*elec_num + 0 + nw*(cord_num+1)*elec_num*elec_num] = 1.0; } } - + // Up to here it should work. for ( int l = 1; l < (cord_num+1); ++l) { k = 0; @@ -3487,7 +3643,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( } free(een_rescaled_e_ij); - + return QMCKL_SUCCESS; } #+end_src @@ -3526,7 +3682,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc ( const double* ee_distance, double* const een_rescaled_e ); #+end_src - + #+begin_src c :comments org :tangle (eval c) :noweb yes qmckl_exit_code qmckl_compute_een_rescaled_e ( const qmckl_context context, @@ -3854,7 +4010,7 @@ end function qmckl_compute_factor_een_rescaled_e_deriv_e_f const double* coord_new, const double* ee_distance, const double* een_rescaled_e, - double* const een_rescaled_e_deriv_e ); + double* const een_rescaled_e_deriv_e ); #+end_src @@ -4213,7 +4369,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_n ( if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; - } + } if (walk_num <= 0) { return QMCKL_INVALID_ARG_2; @@ -4274,7 +4430,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_n ( const int64_t cord_num, const double rescale_factor_kappa_en, const double* en_distance, - double* const een_rescaled_n ); + double* const een_rescaled_n ); #+end_src *** Test @@ -4583,7 +4739,7 @@ end function qmckl_compute_factor_een_rescaled_n_deriv_e_f const double* coord, const double* en_distance, const double* een_rescaled_n, - double* const een_rescaled_n_deriv_e ); + double* const een_rescaled_n_deriv_e ); #+end_src #+CALL: generate_c_interface(table=qmckl_compute_factor_een_rescaled_n_deriv_e_args,rettyp=get_value("CRetType"),fname=get_value("Name")) @@ -5035,7 +5191,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) if (gpu_offload) { #ifdef HAVE_CUBLAS_OFFLOAD - rc = qmckl_compute_tmp_c_cuBlas(context, + rc = qmckl_compute_tmp_c_cublas_offload(context, ctx->jastrow.cord_num, ctx->electron.num, ctx->nucleus.num, @@ -5124,7 +5280,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) if (gpu_offload) { #ifdef HAVE_CUBLAS_OFFLOAD - rc = qmckl_compute_dtmp_c_cuBlas(context, + rc = qmckl_compute_dtmp_c_cublas_offload(context, ctx->jastrow.cord_num, ctx->electron.num, ctx->nucleus.num, @@ -5238,10 +5394,10 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( const qmckl_context context, const int64_t cord_num, int64_t* const dim_cord_vect){ - + int lmax; - + if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; } @@ -5251,7 +5407,7 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( } *dim_cord_vect = 0; - + for (int p=2; p <= cord_num; ++p){ for (int k=p-1; k >= 0; --k) { if (k != 0) { @@ -5265,7 +5421,7 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( } } } - + return QMCKL_SUCCESS; } #+end_src @@ -5276,7 +5432,7 @@ qmckl_exit_code qmckl_compute_dim_cord_vect ( qmckl_exit_code qmckl_compute_dim_cord_vect ( const qmckl_context context, const int64_t cord_num, - int64_t* const dim_cord_vect ); + int64_t* const dim_cord_vect ); #+end_src @@ -5541,15 +5697,15 @@ qmckl_exit_code qmckl_compute_lkpm_combined_index ( int kk, lmax, m; - if (context == QMCKL_NULL_CONTEXT) { + if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; } - if (cord_num <= 0) { + if (cord_num <= 0) { return QMCKL_INVALID_ARG_2; } - if (dim_cord_vect <= 0) { + if (dim_cord_vect <= 0) { return QMCKL_INVALID_ARG_3; } @@ -5586,7 +5742,7 @@ qmckl_exit_code qmckl_compute_lkpm_combined_index ( const qmckl_context context, const int64_t cord_num, const int64_t dim_cord_vect, - int64_t* const lkpm_combined_index ); + int64_t* const lkpm_combined_index ); #+end_src @@ -5627,7 +5783,7 @@ qmckl_exit_code qmckl_compute_tmp_c (const qmckl_context context, #endif } #+end_src - + # #+CALL: generate_c_header(table=qmckl_factor_tmp_c_args,rettyp=get_value("CRetType"),fname="qmckl_compute_tmp_c") #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none @@ -5639,7 +5795,7 @@ qmckl_exit_code qmckl_compute_tmp_c (const qmckl_context context, const int64_t walk_num, const double* een_rescaled_e, const double* een_rescaled_n, - double* const tmp_c ); + double* const tmp_c ); #+end_src #+begin_src f90 :comments org :tangle (eval f) :noweb yes @@ -5719,11 +5875,11 @@ qmckl_exit_code qmckl_compute_tmp_c_doc ( const int64_t walk_num, const double* een_rescaled_e, const double* een_rescaled_n, - double* const tmp_c ); + double* const tmp_c ); #+end_src #+CALL: generate_c_interface(table=qmckl_factor_tmp_c_args,rettyp=get_value("FRetType"),fname="qmckl_compute_tmp_c_doc") - + #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_tmp_c_doc & @@ -5768,19 +5924,19 @@ qmckl_exit_code qmckl_compute_tmp_c_hpc ( if (cord_num <= 0) { return QMCKL_INVALID_ARG_2; - } + } if (elec_num <= 0) { return QMCKL_INVALID_ARG_3; - } + } if (nucl_num <= 0) { return QMCKL_INVALID_ARG_4; - } + } if (walk_num <= 0) { return QMCKL_INVALID_ARG_5; - } + } qmckl_exit_code info = QMCKL_SUCCESS; @@ -5818,18 +5974,259 @@ qmckl_exit_code qmckl_compute_tmp_c_hpc ( #+end_src -#+begin_src c :comments org :tangle (eval c) :noweb yes -qmckl_exit_code qmckl_compute_tmp_c_cuBlas ( - const qmckl_context context, - const int64_t cord_num, - const int64_t elec_num, - const int64_t nucl_num, - const int64_t walk_num, - const double* een_rescaled_e, - const double* een_rescaled_n, - double* const tmp_c ) { - qmckl_exit_code info; + #+CALL: generate_c_header(table=qmckl_factor_tmp_c_args,rettyp=get_value("CRetType"),fname="qmckl_compute_tmp_c") + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org +qmckl_exit_code qmckl_compute_tmp_c ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + +# #+CALL: generate_c_header(table=qmckl_factor_tmp_c_args,rettyp=get_value("CRetType"),fname="qmckl_compute_tmp_c_doc") + + #+RESULTS: + #+begin_src c :tangle (eval h_private_func) :comments org +qmckl_exit_code qmckl_compute_tmp_c_doc ( + const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + +# #+CALL: generate_c_header(table=qmckl_factor_tmp_c_args,rettyp=get_value("CRetType"),fname="qmckl_compute_tmp_c_hpc") + + #+RESULTS: + + #+begin_src c :tangle (eval h_private_func) :comments org +qmckl_exit_code qmckl_compute_tmp_c_hpc (const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ); + #+end_src + +**** OpenACC offload :noexport: + + #+begin_src c :comments org :tangle (eval c) :noweb yes +#ifdef HAVE_OPENACC_OFFLOAD +qmckl_exit_code +qmckl_compute_tmp_c_acc_offload (const qmckl_context context, + const int64_t cord_num, + const int64_t elec_num, + const int64_t nucl_num, + const int64_t walk_num, + const double* een_rescaled_e, + const double* een_rescaled_n, + double* const tmp_c ) +{ + + if (context == QMCKL_NULL_CONTEXT) { + return QMCKL_INVALID_CONTEXT; + } + + if (cord_num <= 0) { + return QMCKL_INVALID_ARG_2; + } + + if (elec_num <= 0) { + return QMCKL_INVALID_ARG_3; + } + + if (nucl_num <= 0) { + return QMCKL_INVALID_ARG_4; + } + + // Compute array access strides: + // For tmp_c... + const int64_t stride_k_c = elec_num; + const int64_t stride_j_c = stride_k_c * nucl_num; + const int64_t stride_i_c = stride_j_c * (cord_num+1); + const int64_t stride_nw_c = stride_i_c * cord_num; + // For een_rescaled_e... + const int64_t stride_m_e = elec_num; + const int64_t stride_i_e = stride_m_e * elec_num; + const int64_t stride_nw_e = stride_i_e * (cord_num+1); + // For een_rescaled_n... + const int64_t stride_k_n = elec_num; + const int64_t stride_j_n = stride_k_n * nucl_num; + const int64_t stride_nw_n = stride_j_n * (cord_num+1); + + const int64_t size_tmp_c = elec_num*nucl_num*(cord_num+1)*cord_num*walk_num; + const int64_t size_e = walk_num*(cord_num+1)*elec_num*elec_num; + const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num; + + #pragma acc parallel copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n]) + { +#pragma acc loop independent gang worker vector collapse(5) + for (int nw=0; nw < walk_num; ++nw) { + for (int i=0; i Date: Fri, 8 Apr 2022 11:11:15 +0200 Subject: [PATCH 065/100] Configure cuBLAS with --enable-gpu and clean code --- configure.ac | 5 +- org/qmckl_jastrow.org | 127 ++++++++++++------------------------------ 2 files changed, 40 insertions(+), 92 deletions(-) diff --git a/configure.ac b/configure.ac index 0ec94b8..17350c7 100644 --- a/configure.ac +++ b/configure.ac @@ -304,9 +304,10 @@ AS_IF([test "$enable_gpu" = "openacc"], [ ]) # cuBLAS offloading -AC_ARG_WITH(cublas, [AS_HELP_STRING([--with-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$withval, HAVE_CUBLAS_OFFLOAD=no) -AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ +HAVE_OPENACC_OFFLOAD="no" +AS_IF([test "$enable_gpu" = "cublas"], [ AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines]) + HAVE_CUBLAS_OFFLOAD="yes" case $CC in *gcc*) diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index adf66d1..fb3cf3e 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -118,7 +118,6 @@ int main() { #include "qmckl_jastrow_private_type.h" #ifdef HAVE_CUBLAS_OFFLOAD -#include #include "cublas_v2.h" #endif @@ -6229,30 +6228,6 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context, { qmckl_exit_code info; - //Initialisation of cublas - - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) - { - fprintf(stdout, "CUBLAS initialization failed!\n"); - exit(EXIT_FAILURE); - } - - - - qmckl_exit_code info; - - //Initialisation of cublas - - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) - { - fprintf(stdout, "CUBLAS initialization failed!\n"); - exit(EXIT_FAILURE); - } - - - if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; } @@ -6269,6 +6244,14 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context, return QMCKL_INVALID_ARG_4; } + //cuBLAS initialization + cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) + { + fprintf(stdout, "CUBLAS initialization failed!\n"); + exit(EXIT_FAILURE); + } + const double alpha = 1.0; const double beta = 0.0; @@ -6284,45 +6267,24 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context, const int64_t bf = elec_num*nucl_num*(cord_num+1); const int64_t cf = bf; - #pragma omp target enter data map(to:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num]) #pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c) { for (int nw=0; nw < walk_num; ++nw) { - int cublasError = cublasDgemmStridedBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha, - &(een_rescaled_e[nw*(cord_num+1)]), \ - LDA, af, \ - &(een_rescaled_n[bf*nw]), \ - LDB, 0, \ - &beta, \ - &(tmp_c[nw*cord_num]), \ + int cublasError = cublasDgemmStridedBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha, + &(een_rescaled_e[nw*(cord_num+1)]), + LDA, af, + &(een_rescaled_n[bf*nw]), + LDB, 0, + &beta, + &(tmp_c[nw*cord_num]), LDC, cf, cord_num); - - - //Manage cublas ERROR - if(cublasError != CUBLAS_STATUS_SUCCESS){ - printf("CUBLAS ERROR %d", cublasError); - info = QMCKL_FAILURE; - - return info; - }else{ - info = QMCKL_SUCCESS; - } - - - - } } - cudaDeviceSynchronize(); - cublasDestroy(handle); - - #pragma omp target exit data map(from:tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num]) - - + cublasDestroy(handle); return info; } #endif @@ -6801,15 +6763,6 @@ qmckl_compute_dtmp_c_cublas_offload ( const double* een_rescaled_n, double* const dtmp_c ) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) - { - fprintf(stdout, "CUBLAS initialization failed!\n"); - exit(EXIT_FAILURE); - } - - - if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; } @@ -6832,6 +6785,14 @@ qmckl_compute_dtmp_c_cublas_offload ( qmckl_exit_code info = QMCKL_SUCCESS; + //cuBLAS initialization + cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) + { + fprintf(stdout, "CUBLAS initialization failed!\n"); + exit(EXIT_FAILURE); + } + const double alpha = 1.0; const double beta = 0.0; @@ -6847,38 +6808,24 @@ qmckl_compute_dtmp_c_cublas_offload ( const int64_t bf = elec_num*nucl_num*(cord_num+1); const int64_t cf = elec_num*4*nucl_num*(cord_num+1); -#pragma omp target enter data map(to:een_rescaled_e_deriv_e[0:elec_num*4*elec_num*(cord_num+1)*walk_num], een_rescaled_n[0:elec_num*nucl_num*(cord_num+1)*walk_num], dtmp_c[0:elec_num*4*nucl_num*(cord_num+1)*cord_num*walk_num]) -#pragma omp target data use_device_ptr(een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c) + #pragma omp target enter data map(to:een_rescaled_e_deriv_e[0:elec_num*4*elec_num*(cord_num+1)*walk_num], een_rescaled_n[0:elec_num*nucl_num*(cord_num+1)*walk_num], dtmp_c[0:elec_num*4*nucl_num*(cord_num+1)*cord_num*walk_num]) + #pragma omp target data use_device_ptr(een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c) { - for (int64_t nw=0; nw < walk_num; ++nw) { - //Manage CUBLAS ERRORS - - int cublasError = cublasDgemmStridedBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha, \ - &(een_rescaled_e_deriv_e[(nw*(cord_num+1))]), \ - LDA, af, \ - &(een_rescaled_n[bf*nw]), \ - LDB, 0, \ - &beta, \ - &(dtmp_c[(nw*cord_num)]), \ + for (int64_t nw=0; nw < walk_num; ++nw) { + int cublasError = cublasDgemmStridedBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha, + &(een_rescaled_e_deriv_e[(nw*(cord_num+1))]), + LDA, af, + &(een_rescaled_n[bf*nw]), LDB, 0, + &beta, + &(dtmp_c[(nw*cord_num)]), LDC, cf, cord_num); - - if(cublasError != CUBLAS_STATUS_SUCCESS){ - printf("CUBLAS ERROR %d", cublasError); - info = QMCKL_FAILURE; - return info; - }else{ - info = QMCKL_SUCCESS; - } - - //} - } } - cudaDeviceSynchronize(); + } + + #pragma omp target exit data map(from:dtmp_c[0:cf*cord_num*walk_num]) + cublasDestroy(handle); - -#pragma omp target exit data map(from:dtmp_c[0:cf*cord_num*walk_num]) - return info; } #endif From 69009900f7d19c18a5f740550f2e3dda4a0027e5 Mon Sep 17 00:00:00 2001 From: Aurelien Delval Date: Fri, 8 Apr 2022 11:37:37 +0200 Subject: [PATCH 066/100] Revert cuBLAS to old config --- configure.ac | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index 17350c7..e2d1c22 100644 --- a/configure.ac +++ b/configure.ac @@ -304,10 +304,10 @@ AS_IF([test "$enable_gpu" = "openacc"], [ ]) # cuBLAS offloading -HAVE_OPENACC_OFFLOAD="no" -AS_IF([test "$enable_gpu" = "cublas"], [ +AC_ARG_WITH(cublas, [AS_HELP_STRING([--with-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$withval, HAVE_CUBLAS_OFFLOAD=no) +AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines]) - HAVE_CUBLAS_OFFLOAD="yes" + HAVE_OPENACC_OFFLOAD="yes" case $CC in *gcc*) From b60fc166012d1e7fd20eb47c231151ac34255ece Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 20 Apr 2022 15:55:59 +0200 Subject: [PATCH 067/100] Add Fortran interfaces in MOs --- org/qmckl_mo.org | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index bf45ff0..1974874 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -256,6 +256,35 @@ bool qmckl_mo_basis_provided(const qmckl_context context) { #+end_src + +*** Fortran interfaces + + #+begin_src f90 :tangle (eval fh_func) :comments org +interface + integer(c_int32_t) function qmckl_get_mo_basis_mo_num (context, & + mo_num) bind(C) + use, intrinsic :: iso_c_binding + import + implicit none + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(out) :: mo_num + end function qmckl_get_mo_basis_mo_num +end interface + +interface + integer(c_int32_t) function qmckl_get_mo_basis_coefficient(context, & + coefficient, size_max) bind(C) + use, intrinsic :: iso_c_binding + import + implicit none + integer (c_int64_t) , intent(in) , value :: context + double precision, intent(out) :: coefficient(*) + integer (c_int64_t) , intent(int), value :: size_max + end function qmckl_get_mo_basis_coefficient +end interface + + #+end_src + ** Initialization functions To set the basis set, all the following functions need to be From 5a833cf3f09e6e1fd102517a36db029b47b6e120 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 5 May 2022 16:25:32 +0200 Subject: [PATCH 068/100] Restored dgemm for AO to MO in doc version --- org/qmckl_mo.org | 52 +++++++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index 88fe69c..5d49030 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -666,25 +666,41 @@ integer function qmckl_compute_mo_basis_mo_vgl_doc_f(context, & integer*8 :: i,j,k double precision :: c1, c2, c3, c4, c5 - do j=1,point_num - mo_vgl(:,:,j) = 0.d0 - do k=1,ao_num - if (ao_vgl(k,1,j) /= 0.d0) then - c1 = ao_vgl(k,1,j) - c2 = ao_vgl(k,2,j) - c3 = ao_vgl(k,3,j) - c4 = ao_vgl(k,4,j) - c5 = ao_vgl(k,5,j) - do i=1,mo_num - mo_vgl(i,1,j) = mo_vgl(i,1,j) + coef_normalized_t(i,k) * c1 - mo_vgl(i,2,j) = mo_vgl(i,2,j) + coef_normalized_t(i,k) * c2 - mo_vgl(i,3,j) = mo_vgl(i,3,j) + coef_normalized_t(i,k) * c3 - mo_vgl(i,4,j) = mo_vgl(i,4,j) + coef_normalized_t(i,k) * c4 - mo_vgl(i,5,j) = mo_vgl(i,5,j) + coef_normalized_t(i,k) * c5 - end do - end if + integer*8 :: LDA, LDB, LDC + + info = QMCKL_SUCCESS + if (.False.) then ! fast algorithm + do j=1,point_num + mo_vgl(:,:,j) = 0.d0 + do k=1,ao_num + if (ao_vgl(k,1,j) /= 0.d0) then + c1 = ao_vgl(k,1,j) + c2 = ao_vgl(k,2,j) + c3 = ao_vgl(k,3,j) + c4 = ao_vgl(k,4,j) + c5 = ao_vgl(k,5,j) + do i=1,mo_num + mo_vgl(i,1,j) = mo_vgl(i,1,j) + coef_normalized_t(i,k) * c1 + mo_vgl(i,2,j) = mo_vgl(i,2,j) + coef_normalized_t(i,k) * c2 + mo_vgl(i,3,j) = mo_vgl(i,3,j) + coef_normalized_t(i,k) * c3 + mo_vgl(i,4,j) = mo_vgl(i,4,j) + coef_normalized_t(i,k) * c4 + mo_vgl(i,5,j) = mo_vgl(i,5,j) + coef_normalized_t(i,k) * c5 + end do + end if + end do end do - end do + + else ! dgemm + + LDA = size(coef_normalized_t,1) + LDB = size(ao_vgl,1) + LDC = size(mo_vgl,1) + + info = qmckl_dgemm(context,'N', 'N', mo_num, point_num*5_8, ao_num*1_8, 1.d0, & + coef_normalized_t, LDA, ao_vgl, LDB, & + 0.d0, mo_vgl, LDC) + + end if end function qmckl_compute_mo_basis_mo_vgl_doc_f #+end_src From 1889fed1006921c78fc11c8de16e9cb780f41d84 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 5 May 2022 20:49:44 +0200 Subject: [PATCH 069/100] Fixed mo bug --- org/qmckl_mo.org | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index 5d49030..66023c0 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -669,7 +669,7 @@ integer function qmckl_compute_mo_basis_mo_vgl_doc_f(context, & integer*8 :: LDA, LDB, LDC info = QMCKL_SUCCESS - if (.False.) then ! fast algorithm + if (.True.) then ! fast algorithm do j=1,point_num mo_vgl(:,:,j) = 0.d0 do k=1,ao_num @@ -810,11 +810,12 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, #endif for (int64_t ipoint=0 ; ipoint < point_num ; ++ipoint) { double* restrict const vgl1 = &(mo_vgl[ipoint*5*mo_num]); - const double* restrict avgl1 = &(ao_vgl[ipoint*5*ao_num]); double* restrict const vgl2 = vgl1 + mo_num; double* restrict const vgl3 = vgl1 + (mo_num << 1); double* restrict const vgl4 = vgl1 + (mo_num << 1) + mo_num; double* restrict const vgl5 = vgl1 + (mo_num << 2); + + const double* restrict avgl1 = &(ao_vgl[ipoint*5*ao_num]); const double* restrict avgl2 = avgl1 + ao_num; const double* restrict avgl3 = avgl1 + (ao_num << 1); const double* restrict avgl4 = avgl1 + (ao_num << 1) + ao_num; @@ -849,6 +850,7 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, } int64_t n; + for (n=0 ; n < nidx-4 ; n+=4) { int64_t k = idx[n]; const double* restrict ck1 = coef_normalized_t + idx[n ]*mo_num; @@ -893,8 +895,7 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, } } - int64_t n0 = nidx-4; - n0 = n0 < 0 ? 0 : n0; + const int64_t n0 = n < 0 ? 0 : n; for (int64_t n=n0 ; n < nidx ; n+=1) { const double* restrict ck = coef_normalized_t + idx[n]*mo_num; const double a1 = av1[n]; From cc81057ff2765fa625b3a5c1e4d590226d46f1a5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 5 May 2022 21:00:50 +0200 Subject: [PATCH 070/100] Renaming --- org/qmckl_mo.org | 55 +++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index 88fe69c..bb6037d 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -639,7 +639,7 @@ qmckl_exit_code qmckl_provide_mo_vgl(qmckl_context context) | ~ao_num~ | ~int64_t~ | in | Number of AOs | | ~mo_num~ | ~int64_t~ | in | Number of MOs | | ~point_num~ | ~int64_t~ | in | Number of points | - | ~coef_normalized_t~ | ~double[mo_num][ao_num]~ | in | Transpose of the AO to MO transformation matrix | + | ~coefficient_t~ | ~double[mo_num][ao_num]~ | in | Transpose of the AO to MO transformation matrix | | ~ao_vgl~ | ~double[point_num][5][ao_num]~ | in | Value, gradients and Laplacian of the AOs | | ~mo_vgl~ | ~double[point_num][5][mo_num]~ | out | Value, gradients and Laplacian of the MOs | @@ -653,7 +653,7 @@ qmckl_exit_code qmckl_provide_mo_vgl(qmckl_context context) #+begin_src f90 :comments org :tangle (eval f) :noweb yes integer function qmckl_compute_mo_basis_mo_vgl_doc_f(context, & ao_num, mo_num, point_num, & - coef_normalized_t, ao_vgl, mo_vgl) & + coefficient_t, ao_vgl, mo_vgl) & result(info) use qmckl implicit none @@ -661,7 +661,7 @@ integer function qmckl_compute_mo_basis_mo_vgl_doc_f(context, & integer*8 , intent(in) :: ao_num, mo_num integer*8 , intent(in) :: point_num double precision , intent(in) :: ao_vgl(ao_num,5,point_num) - double precision , intent(in) :: coef_normalized_t(mo_num,ao_num) + double precision , intent(in) :: coefficient_t(mo_num,ao_num) double precision , intent(out) :: mo_vgl(mo_num,5,point_num) integer*8 :: i,j,k double precision :: c1, c2, c3, c4, c5 @@ -676,15 +676,22 @@ integer function qmckl_compute_mo_basis_mo_vgl_doc_f(context, & c4 = ao_vgl(k,4,j) c5 = ao_vgl(k,5,j) do i=1,mo_num - mo_vgl(i,1,j) = mo_vgl(i,1,j) + coef_normalized_t(i,k) * c1 - mo_vgl(i,2,j) = mo_vgl(i,2,j) + coef_normalized_t(i,k) * c2 - mo_vgl(i,3,j) = mo_vgl(i,3,j) + coef_normalized_t(i,k) * c3 - mo_vgl(i,4,j) = mo_vgl(i,4,j) + coef_normalized_t(i,k) * c4 - mo_vgl(i,5,j) = mo_vgl(i,5,j) + coef_normalized_t(i,k) * c5 + mo_vgl(i,1,j) = mo_vgl(i,1,j) + coefficient_t(i,k) * c1 + mo_vgl(i,2,j) = mo_vgl(i,2,j) + coefficient_t(i,k) * c2 + mo_vgl(i,3,j) = mo_vgl(i,3,j) + coefficient_t(i,k) * c3 + mo_vgl(i,4,j) = mo_vgl(i,4,j) + coefficient_t(i,k) * c4 + mo_vgl(i,5,j) = mo_vgl(i,5,j) + coefficient_t(i,k) * c5 end do end if end do end do + info = QMCKL_SUCCESS + +! info = qmckl_dgemm(context,'N', 'N', mo_num, point_num, ao_num, 1.d0, & +! coefficient_t, int(size(coefficient_t,1),8), & +! ao_vgl, int(size(ao_vgl,1),8), 0.d0, & +! mo_vgl, int(size(mo_vgl,1),8)) + end function qmckl_compute_mo_basis_mo_vgl_doc_f #+end_src @@ -698,7 +705,7 @@ end function qmckl_compute_mo_basis_mo_vgl_doc_f const int64_t ao_num, const int64_t mo_num, const int64_t point_num, - const double* coef_normalized_t, + const double* coefficient_t, const double* ao_vgl, double* const mo_vgl ); #+end_src @@ -712,7 +719,7 @@ end function qmckl_compute_mo_basis_mo_vgl_doc_f const int64_t ao_num, const int64_t mo_num, const int64_t point_num, - const double* coef_normalized_t, + const double* coefficient_t, const double* ao_vgl, double* const mo_vgl ); #+end_src @@ -722,7 +729,7 @@ end function qmckl_compute_mo_basis_mo_vgl_doc_f #+RESULTS: #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_mo_basis_mo_vgl_doc & - (context, ao_num, mo_num, point_num, coef_normalized_t, ao_vgl, mo_vgl) & + (context, ao_num, mo_num, point_num, coefficient_t, ao_vgl, mo_vgl) & bind(C) result(info) use, intrinsic :: iso_c_binding @@ -732,13 +739,13 @@ end function qmckl_compute_mo_basis_mo_vgl_doc_f integer (c_int64_t) , intent(in) , value :: ao_num integer (c_int64_t) , intent(in) , value :: mo_num integer (c_int64_t) , intent(in) , value :: point_num - real (c_double ) , intent(in) :: coef_normalized_t(ao_num,mo_num) + real (c_double ) , intent(in) :: coefficient_t(ao_num,mo_num) real (c_double ) , intent(in) :: ao_vgl(ao_num,5,point_num) real (c_double ) , intent(out) :: mo_vgl(mo_num,5,point_num) integer(c_int32_t), external :: qmckl_compute_mo_basis_mo_vgl_doc_f info = qmckl_compute_mo_basis_mo_vgl_doc_f & - (context, ao_num, mo_num, point_num, coef_normalized_t, ao_vgl, mo_vgl) + (context, ao_num, mo_num, point_num, coefficient_t, ao_vgl, mo_vgl) end function qmckl_compute_mo_basis_mo_vgl_doc #+end_src @@ -749,14 +756,14 @@ qmckl_compute_mo_basis_mo_vgl (const qmckl_context context, const int64_t ao_num, const int64_t mo_num, const int64_t point_num, - const double* coef_normalized_t, + const double* coefficient_t, const double* ao_vgl, double* const mo_vgl ) { #ifdef HAVE_HPC - return qmckl_compute_mo_basis_mo_vgl_hpc (context, ao_num, mo_num, point_num, coef_normalized_t, ao_vgl, mo_vgl); + return qmckl_compute_mo_basis_mo_vgl_hpc (context, ao_num, mo_num, point_num, coefficient_t, ao_vgl, mo_vgl); #else - return qmckl_compute_mo_basis_mo_vgl_doc (context, ao_num, mo_num, point_num, coef_normalized_t, ao_vgl, mo_vgl); + return qmckl_compute_mo_basis_mo_vgl_doc (context, ao_num, mo_num, point_num, coefficient_t, ao_vgl, mo_vgl); #endif } #+end_src @@ -772,7 +779,7 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, const int64_t ao_num, const int64_t mo_num, const int64_t point_num, - const double* coef_normalized_t, + const double* coefficient_t, const double* ao_vgl, double* const mo_vgl ); #endif @@ -785,7 +792,7 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, const int64_t ao_num, const int64_t mo_num, const int64_t point_num, - const double* restrict coef_normalized_t, + const double* restrict coefficient_t, const double* restrict ao_vgl, double* restrict const mo_vgl ) { @@ -820,7 +827,7 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, double av4[ao_num]; double av5[ao_num]; for (int64_t k=0 ; k Date: Tue, 5 Apr 2022 11:44:17 +0200 Subject: [PATCH 071/100] Fixed cppcheck --- .github/workflows/test-build.yml | 1 - org/qmckl_ao.org | 3 +- org/qmckl_blas.org | 6 +- org/qmckl_jastrow.org | 300 +++++++++++------------- org/qmckl_mo.org | 14 +- org/qmckl_sherman_morrison_woodbury.org | 2 +- 6 files changed, 154 insertions(+), 172 deletions(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index c7af73f..b4e5b6c 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -4,7 +4,6 @@ on: push: branches: [ master ] pull_request: - branches: [ master ] jobs: x86_ubuntu: diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 54c5319..2111f8e 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -2634,9 +2634,10 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) { } } - rc = QMCKL_SUCCESS; #ifdef HAVE_HPC rc = qmckl_finalize_basis_hpc(context); +#else + rc = QMCKL_SUCCESS; #endif return rc; diff --git a/org/qmckl_blas.org b/org/qmckl_blas.org index 9cd7e18..1cf76e4 100644 --- a/org/qmckl_blas.org +++ b/org/qmckl_blas.org @@ -84,8 +84,8 @@ are not intended to be passed to external codes. #+begin_src c :comments org :tangle (eval h_private_type) :exports none typedef struct qmckl_vector { - int64_t size; double* restrict data; + int64_t size; } qmckl_vector; #+end_src @@ -160,8 +160,8 @@ qmckl_vector_free( qmckl_context context, #+begin_src c :comments org :tangle (eval h_private_type) :exports none typedef struct qmckl_matrix { - int64_t size[2]; double* restrict data; + int64_t size[2]; } qmckl_matrix; #+end_src @@ -245,9 +245,9 @@ qmckl_matrix_free( qmckl_context context, #define QMCKL_TENSOR_ORDER_MAX 16 typedef struct qmckl_tensor { + double* restrict data; int64_t order; int64_t size[QMCKL_TENSOR_ORDER_MAX]; - double* restrict data; } qmckl_tensor; #+end_src diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 61062af..1e3c7a9 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -837,7 +837,7 @@ qmckl_set_jastrow_type_nucl_vector(qmckl_context context, } if (ctx->jastrow.type_nucl_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.type_nucl_vector); + rc = qmckl_free(context, ctx->jastrow.type_nucl_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_type_nucl_vector", @@ -896,7 +896,7 @@ qmckl_set_jastrow_aord_vector(qmckl_context context, } if (ctx->jastrow.aord_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.aord_vector); + rc = qmckl_free(context, ctx->jastrow.aord_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_ord_vector", @@ -959,7 +959,7 @@ qmckl_set_jastrow_bord_vector(qmckl_context context, } if (ctx->jastrow.bord_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.bord_vector); + rc = qmckl_free(context, ctx->jastrow.bord_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_ord_vector", @@ -1029,7 +1029,7 @@ qmckl_set_jastrow_cord_vector(qmckl_context context, } if (ctx->jastrow.cord_vector != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.cord_vector); + rc = qmckl_free(context, ctx->jastrow.cord_vector); if (rc != QMCKL_SUCCESS) { return qmckl_failwith( context, rc, "qmckl_set_ord_vector", @@ -1388,12 +1388,11 @@ qmckl_exit_code qmckl_provide_asymp_jasb(qmckl_context context) ctx->jastrow.asymp_jasb = asymp_jasb; } - qmckl_exit_code rc = - qmckl_compute_asymp_jasb(context, - ctx->jastrow.bord_num, - ctx->jastrow.bord_vector, - rescale_factor_kappa_ee, - ctx->jastrow.asymp_jasb); + rc = qmckl_compute_asymp_jasb(context, + ctx->jastrow.bord_num, + ctx->jastrow.bord_vector, + rescale_factor_kappa_ee, + ctx->jastrow.asymp_jasb); if (rc != QMCKL_SUCCESS) { return rc; } @@ -1470,10 +1469,6 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( const double rescale_factor_kappa_ee, double* const asymp_jasb ) { - double kappa_inv, x, asym_one; - - kappa_inv = 1.0 / rescale_factor_kappa_ee; - if (context == QMCKL_NULL_CONTEXT){ return QMCKL_INVALID_CONTEXT; } @@ -1482,14 +1477,15 @@ qmckl_exit_code qmckl_compute_asymp_jasb ( return QMCKL_INVALID_ARG_2; } - asym_one = bord_vector[0] * kappa_inv / (1.0 + bord_vector[1] * kappa_inv); + const double kappa_inv = 1.0 / rescale_factor_kappa_ee; + const double asym_one = bord_vector[0] * kappa_inv / (1.0 + bord_vector[1] * kappa_inv); asymp_jasb[0] = asym_one; asymp_jasb[1] = 0.5 * asym_one; for (int i = 0 ; i <= 1; ++i) { - x = kappa_inv; + double x = kappa_inv; for (int p = 1; p < bord_num; ++p){ - x = x * kappa_inv; + x *= kappa_inv; asymp_jasb[i] = asymp_jasb[i] + bord_vector[p + 1] * x; } } @@ -1672,16 +1668,15 @@ qmckl_exit_code qmckl_provide_factor_ee(qmckl_context context) ctx->jastrow.factor_ee = factor_ee; } - qmckl_exit_code rc = - qmckl_compute_factor_ee(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->electron.up_num, - ctx->jastrow.bord_num, - ctx->jastrow.bord_vector, - ctx->electron.ee_distance_rescaled, - ctx->jastrow.asymp_jasb, - ctx->jastrow.factor_ee); + rc = qmckl_compute_factor_ee(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->electron.up_num, + ctx->jastrow.bord_num, + ctx->jastrow.bord_vector, + ctx->electron.ee_distance_rescaled, + ctx->jastrow.asymp_jasb, + ctx->jastrow.factor_ee); if (rc != QMCKL_SUCCESS) { return rc; } @@ -1990,17 +1985,16 @@ qmckl_exit_code qmckl_provide_factor_ee_deriv_e(qmckl_context context) ctx->jastrow.factor_ee_deriv_e = factor_ee_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_ee_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->electron.up_num, - ctx->jastrow.bord_num, - ctx->jastrow.bord_vector, - ctx->electron.ee_distance_rescaled, - ctx->electron.ee_distance_rescaled_deriv_e, - ctx->jastrow.asymp_jasb, - ctx->jastrow.factor_ee_deriv_e); + rc = qmckl_compute_factor_ee_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->electron.up_num, + ctx->jastrow.bord_num, + ctx->jastrow.bord_vector, + ctx->electron.ee_distance_rescaled, + ctx->electron.ee_distance_rescaled_deriv_e, + ctx->jastrow.asymp_jasb, + ctx->jastrow.factor_ee_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -2412,21 +2406,20 @@ qmckl_exit_code qmckl_provide_factor_en(qmckl_context context) ctx->jastrow.factor_en = factor_en; } - qmckl_exit_code rc = - qmckl_compute_factor_en(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.type_nucl_num, - ctx->jastrow.type_nucl_vector, - ctx->jastrow.aord_num, - ctx->jastrow.aord_vector, - ctx->electron.en_distance_rescaled, - ctx->jastrow.factor_en); + rc = qmckl_compute_factor_en(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.type_nucl_num, + ctx->jastrow.type_nucl_vector, + ctx->jastrow.aord_num, + ctx->jastrow.aord_vector, + ctx->electron.en_distance_rescaled, + ctx->jastrow.factor_en); if (rc != QMCKL_SUCCESS) { return rc; } - + ctx->jastrow.factor_en_date = ctx->date; } @@ -2720,18 +2713,17 @@ qmckl_exit_code qmckl_provide_factor_en_deriv_e(qmckl_context context) ctx->jastrow.factor_en_deriv_e = factor_en_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_en_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.type_nucl_num, - ctx->jastrow.type_nucl_vector, - ctx->jastrow.aord_num, - ctx->jastrow.aord_vector, - ctx->electron.en_distance_rescaled, - ctx->electron.en_distance_rescaled_deriv_e, - ctx->jastrow.factor_en_deriv_e); + rc = qmckl_compute_factor_en_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.type_nucl_num, + ctx->jastrow.type_nucl_vector, + ctx->jastrow.aord_num, + ctx->jastrow.aord_vector, + ctx->electron.en_distance_rescaled, + ctx->electron.en_distance_rescaled_deriv_e, + ctx->jastrow.factor_en_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -3138,14 +3130,13 @@ qmckl_exit_code qmckl_provide_een_rescaled_e(qmckl_context context) ctx->jastrow.een_rescaled_e = een_rescaled_e; } - qmckl_exit_code rc = - qmckl_compute_een_rescaled_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_ee, - ctx->electron.ee_distance, - ctx->jastrow.een_rescaled_e); + rc = qmckl_compute_een_rescaled_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_ee, + ctx->electron.ee_distance, + ctx->jastrow.een_rescaled_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -3470,16 +3461,15 @@ qmckl_exit_code qmckl_provide_een_rescaled_e_deriv_e(qmckl_context context) ctx->jastrow.een_rescaled_e_deriv_e = een_rescaled_e_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_een_rescaled_e_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_ee, - ctx->electron.coord_new.data, - ctx->electron.ee_distance, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_e_deriv_e); + rc = qmckl_compute_factor_een_rescaled_e_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_ee, + ctx->electron.coord_new.data, + ctx->electron.ee_distance, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_e_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -3849,15 +3839,14 @@ qmckl_exit_code qmckl_provide_een_rescaled_n(qmckl_context context) ctx->jastrow.een_rescaled_n = een_rescaled_n; } - qmckl_exit_code rc = - qmckl_compute_een_rescaled_n(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_en, - ctx->electron.en_distance, - ctx->jastrow.een_rescaled_n); + rc = qmckl_compute_een_rescaled_n(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_en, + ctx->electron.en_distance, + ctx->jastrow.een_rescaled_n); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4165,18 +4154,17 @@ qmckl_exit_code qmckl_provide_een_rescaled_n_deriv_e(qmckl_context context) ctx->jastrow.een_rescaled_n_deriv_e = een_rescaled_n_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_een_rescaled_n_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->electron.rescale_factor_kappa_en, - ctx->electron.coord_new.data, - ctx->nucleus.coord.data, - ctx->electron.en_distance, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.een_rescaled_n_deriv_e); + rc = qmckl_compute_factor_een_rescaled_n_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->electron.rescale_factor_kappa_en, + ctx->electron.coord_new.data, + ctx->nucleus.coord.data, + ctx->electron.en_distance, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.een_rescaled_n_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4666,14 +4654,13 @@ qmckl_exit_code qmckl_provide_cord_vect_full(qmckl_context context) ctx->jastrow.cord_vect_full = cord_vect_full; } - qmckl_exit_code rc = - qmckl_compute_cord_vect_full(context, - ctx->nucleus.num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.type_nucl_num, - ctx->jastrow.type_nucl_vector, - ctx->jastrow.cord_vector, - ctx->jastrow.cord_vect_full); + rc = qmckl_compute_cord_vect_full(context, + ctx->nucleus.num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.type_nucl_num, + ctx->jastrow.type_nucl_vector, + ctx->jastrow.cord_vector, + ctx->jastrow.cord_vect_full); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4717,11 +4704,10 @@ qmckl_exit_code qmckl_provide_lkpm_combined_index(qmckl_context context) ctx->jastrow.lkpm_combined_index = lkpm_combined_index; } - qmckl_exit_code rc = - qmckl_compute_lkpm_combined_index(context, - ctx->jastrow.cord_num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.lkpm_combined_index); + rc = qmckl_compute_lkpm_combined_index(context, + ctx->jastrow.cord_num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.lkpm_combined_index); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4766,15 +4752,14 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) ctx->jastrow.tmp_c = tmp_c; } - qmckl_exit_code rc = - qmckl_compute_tmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); + rc = qmckl_compute_tmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); if (rc != QMCKL_SUCCESS) { return rc; } @@ -4807,7 +4792,7 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; mem_info.size = (ctx->jastrow.cord_num) * (ctx->jastrow.cord_num + 1) - * 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); + ,* 4 * ctx->electron.num * ctx->nucleus.num * ctx->electron.walk_num * sizeof(double); double* dtmp_c = (double*) qmckl_malloc(context, mem_info); if (dtmp_c == NULL) { @@ -4819,15 +4804,14 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.dtmp_c = dtmp_c; } - qmckl_exit_code rc = - qmckl_compute_dtmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); + rc = qmckl_compute_dtmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); if (rc != QMCKL_SUCCESS) { return rc; } @@ -5582,18 +5566,17 @@ qmckl_exit_code qmckl_provide_factor_een(qmckl_context context) ctx->jastrow.factor_een = factor_een; } - qmckl_exit_code rc = - qmckl_compute_factor_een(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.cord_vect_full, - ctx->jastrow.lkpm_combined_index, - ctx->jastrow.tmp_c, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.factor_een); + rc = qmckl_compute_factor_een(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.cord_vect_full, + ctx->jastrow.lkpm_combined_index, + ctx->jastrow.tmp_c, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.factor_een); if (rc != QMCKL_SUCCESS) { return rc; } @@ -6093,20 +6076,19 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context) ctx->jastrow.factor_een_deriv_e = factor_een_deriv_e; } - qmckl_exit_code rc = - qmckl_compute_factor_een_deriv_e(context, - ctx->electron.walk_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->jastrow.cord_num, - ctx->jastrow.dim_cord_vect, - ctx->jastrow.cord_vect_full, - ctx->jastrow.lkpm_combined_index, - ctx->jastrow.tmp_c, - ctx->jastrow.dtmp_c, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.een_rescaled_n_deriv_e, - ctx->jastrow.factor_een_deriv_e); + rc = qmckl_compute_factor_een_deriv_e(context, + ctx->electron.walk_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->jastrow.cord_num, + ctx->jastrow.dim_cord_vect, + ctx->jastrow.cord_vect_full, + ctx->jastrow.lkpm_combined_index, + ctx->jastrow.tmp_c, + ctx->jastrow.dtmp_c, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.een_rescaled_n_deriv_e, + ctx->jastrow.factor_een_deriv_e); if (rc != QMCKL_SUCCESS) { return rc; } diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index 66023c0..31d2885 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -896,13 +896,13 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, } const int64_t n0 = n < 0 ? 0 : n; - for (int64_t n=n0 ; n < nidx ; n+=1) { - const double* restrict ck = coef_normalized_t + idx[n]*mo_num; - const double a1 = av1[n]; - const double a2 = av2[n]; - const double a3 = av3[n]; - const double a4 = av4[n]; - const double a5 = av5[n]; + for (int64_t m=n0 ; m < nidx ; m+=1) { + const double* restrict ck = coef_normalized_t + idx[m]*mo_num; + const double a1 = av1[m]; + const double a2 = av2[m]; + const double a3 = av3[m]; + const double a4 = av4[m]; + const double a5 = av5[m]; #ifdef HAVE_OPENMP #pragma omp simd diff --git a/org/qmckl_sherman_morrison_woodbury.org b/org/qmckl_sherman_morrison_woodbury.org index 598ad32..ae358e8 100644 --- a/org/qmckl_sherman_morrison_woodbury.org +++ b/org/qmckl_sherman_morrison_woodbury.org @@ -965,7 +965,7 @@ qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context, rc = qmckl_woodbury_3(context, LDS, Dim, Updates_3block, Updates_index_3block, breakdown, Slater_inv, determinant); if (rc != 0) { // Send the entire block to slagel_splitting uint64_t l = 0; - rc = qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block, + (void) qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block, breakdown, Slater_inv, later_updates + (Dim * later), later_index + later, &l, determinant); later = later + l; } From b70dd91db94427cbcc9613d1e6675f8d6885395a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 6 May 2022 00:18:23 +0200 Subject: [PATCH 072/100] Fix bad style --- org/qmckl_mo.org | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index 31d2885..026f52c 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -805,6 +805,8 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, const double* restrict ao_vgl, double* restrict const mo_vgl ) { + assert (context != QMCKL_NULL_CONTEXT); + #ifdef HAVE_OPENMP #pragma omp parallel for #endif @@ -837,7 +839,6 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context, double av4[ao_num]; double av5[ao_num]; for (int64_t k=0 ; k Date: Tue, 5 Apr 2022 11:44:17 +0200 Subject: [PATCH 073/100] Fixed cppcheck --- .github/workflows/test-build.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index b4e5b6c..12d4503 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -2,7 +2,6 @@ name: test-build on: push: - branches: [ master ] pull_request: jobs: From e54ec07e6bf46b417882ead921391f9058923c77 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 5 Apr 2022 10:50:51 +0200 Subject: [PATCH 074/100] warnings --- org/qmckl_determinant.org | 69 +++++++++++++++++---------------------- org/qmckl_nucleus.org | 37 ++++++++++----------- 2 files changed, 48 insertions(+), 58 deletions(-) diff --git a/org/qmckl_determinant.org b/org/qmckl_determinant.org index 0412db6..8c205fc 100644 --- a/org/qmckl_determinant.org +++ b/org/qmckl_determinant.org @@ -1134,36 +1134,28 @@ end function qmckl_compute_det_vgl_beta_f #+begin_src c :tangle (eval c_test) :exports none -#define walk_num chbrclf_walk_num -#define elec_num chbrclf_elec_num -#define shell_num chbrclf_shell_num -#define ao_num chbrclf_ao_num - -int64_t elec_up_num = chbrclf_elec_up_num; -int64_t elec_dn_num = chbrclf_elec_dn_num; double* elec_coord = &(chbrclf_elec_coord[0][0][0]); -const int64_t nucl_num = chbrclf_nucl_num; const double* nucl_charge = chbrclf_charge; const double* nucl_coord = &(chbrclf_nucl_coord[0][0]); -rc = qmckl_set_electron_num (context, elec_up_num, elec_dn_num); +rc = qmckl_set_electron_num (context, chbrclf_elec_up_num, chbrclf_elec_dn_num); assert (rc == QMCKL_SUCCESS); -rc = qmckl_set_electron_walk_num (context, walk_num); +rc = qmckl_set_electron_walk_num (context, chbrclf_walk_num); assert (rc == QMCKL_SUCCESS); assert(qmckl_electron_provided(context)); -rc = qmckl_set_electron_coord (context, 'N', elec_coord, walk_num*elec_num*3); +rc = qmckl_set_electron_coord (context, 'N', elec_coord, chbrclf_walk_num*chbrclf_elec_num*3); assert(rc == QMCKL_SUCCESS); -rc = qmckl_set_nucleus_num (context, nucl_num); +rc = qmckl_set_nucleus_num (context, chbrclf_nucl_num); assert(rc == QMCKL_SUCCESS); -rc = qmckl_set_nucleus_coord (context, 'T', &(nucl_coord[0]), nucl_num*3); +rc = qmckl_set_nucleus_coord (context, 'T', &(nucl_coord[0]), chbrclf_nucl_num*3); assert(rc == QMCKL_SUCCESS); -rc = qmckl_set_nucleus_charge(context, nucl_charge, nucl_num); +rc = qmckl_set_nucleus_charge(context, nucl_charge, chbrclf_nucl_num); assert(rc == QMCKL_SUCCESS); assert(qmckl_nucleus_provided(context)); @@ -1195,27 +1187,27 @@ rc = qmckl_set_ao_basis_prim_num (context, chbrclf_prim_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_ao_basis_provided(context)); -rc = qmckl_set_ao_basis_nucleus_index (context, nucleus_index, nucl_num); +rc = qmckl_set_ao_basis_nucleus_index (context, nucleus_index, chbrclf_nucl_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_ao_basis_provided(context)); -rc = qmckl_set_ao_basis_nucleus_shell_num (context, nucleus_shell_num, nucl_num); +rc = qmckl_set_ao_basis_nucleus_shell_num (context, nucleus_shell_num, chbrclf_nucl_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_ao_basis_provided(context)); -rc = qmckl_set_ao_basis_shell_ang_mom (context, shell_ang_mom, shell_num); +rc = qmckl_set_ao_basis_shell_ang_mom (context, shell_ang_mom, chbrclf_shell_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_ao_basis_provided(context)); -rc = qmckl_set_ao_basis_shell_factor (context, shell_factor, shell_num); +rc = qmckl_set_ao_basis_shell_factor (context, shell_factor, chbrclf_shell_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_ao_basis_provided(context)); -rc = qmckl_set_ao_basis_shell_prim_num (context, shell_prim_num, shell_num); +rc = qmckl_set_ao_basis_shell_prim_num (context, shell_prim_num, chbrclf_shell_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_ao_basis_provided(context)); -rc = qmckl_set_ao_basis_shell_prim_index (context, shell_prim_index, shell_num); +rc = qmckl_set_ao_basis_shell_prim_index (context, shell_prim_index, chbrclf_shell_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_ao_basis_provided(context)); @@ -1239,14 +1231,13 @@ assert(rc == QMCKL_SUCCESS); assert(qmckl_ao_basis_provided(context)); -double ao_vgl[walk_num*elec_num][5][chbrclf_ao_num]; +double ao_vgl[chbrclf_walk_num*chbrclf_elec_num][5][chbrclf_ao_num]; -rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0]), (int64_t) 5*walk_num*elec_num*chbrclf_ao_num); +rc = qmckl_get_ao_basis_ao_vgl(context, &(ao_vgl[0][0][0]), (int64_t) 5*chbrclf_walk_num*chbrclf_elec_num*chbrclf_ao_num); assert (rc == QMCKL_SUCCESS); /* Set up MO data */ -const int64_t mo_num = chbrclf_mo_num; -rc = qmckl_set_mo_basis_mo_num(context, mo_num); +rc = qmckl_set_mo_basis_mo_num(context, chbrclf_mo_num); assert (rc == QMCKL_SUCCESS); const double * mo_coefficient = &(chbrclf_mo_coef[0]); @@ -1256,31 +1247,31 @@ assert (rc == QMCKL_SUCCESS); assert(qmckl_mo_basis_provided(context)); -double mo_vgl[walk_num*elec_num][5][chbrclf_mo_num]; -rc = qmckl_get_mo_basis_mo_vgl(context, &(mo_vgl[0][0][0]), 5*walk_num*elec_num*chbrclf_mo_num); +double mo_vgl[chbrclf_walk_num*chbrclf_elec_num][5][chbrclf_mo_num]; +rc = qmckl_get_mo_basis_mo_vgl(context, &(mo_vgl[0][0][0]), 5*chbrclf_walk_num*chbrclf_elec_num*chbrclf_mo_num); assert (rc == QMCKL_SUCCESS); /* Set up determinant data */ -const int64_t det_num_alpha = 1; -const int64_t det_num_beta = 1; -int64_t mo_index_alpha[det_num_alpha][walk_num][elec_up_num]; -int64_t mo_index_beta[det_num_alpha][walk_num][elec_dn_num]; +#define det_num_alpha 1 +#define det_num_beta 1 +int64_t mo_index_alpha[det_num_alpha][chbrclf_walk_num][chbrclf_elec_up_num]; +int64_t mo_index_beta[det_num_alpha][chbrclf_walk_num][chbrclf_elec_dn_num]; int i, j, k; for(k = 0; k < det_num_alpha; ++k) - for(i = 0; i < walk_num; ++i) - for(j = 0; j < elec_up_num; ++j) + for(i = 0; i < chbrclf_walk_num; ++i) + for(j = 0; j < chbrclf_elec_up_num; ++j) mo_index_alpha[k][i][j] = j + 1; for(k = 0; k < det_num_beta; ++k) - for(i = 0; i < walk_num; ++i) - for(j = 0; j < elec_up_num; ++j) + for(i = 0; i < chbrclf_walk_num; ++i) + for(j = 0; j < chbrclf_elec_up_num; ++j) mo_index_beta[k][i][j] = j + 1; rc = qmckl_set_determinant_type (context, typ); assert(rc == QMCKL_SUCCESS); -rc = qmckl_set_determinant_walk_num (context, walk_num); +rc = qmckl_set_determinant_walk_num (context, chbrclf_walk_num); assert (rc == QMCKL_SUCCESS); rc = qmckl_set_determinant_det_num_alpha (context, det_num_alpha); @@ -1297,8 +1288,8 @@ assert (rc == QMCKL_SUCCESS); // Get slater-determinant -double det_vgl_alpha[det_num_alpha][walk_num][5][elec_up_num][elec_up_num]; -double det_vgl_beta[det_num_beta][walk_num][5][elec_dn_num][elec_dn_num]; +double det_vgl_alpha[det_num_alpha][chbrclf_walk_num][5][chbrclf_elec_up_num][chbrclf_elec_up_num]; +double det_vgl_beta[det_num_beta][chbrclf_walk_num][5][chbrclf_elec_dn_num][chbrclf_elec_dn_num]; rc = qmckl_get_det_vgl_alpha(context, &(det_vgl_alpha[0][0][0][0][0])); assert (rc == QMCKL_SUCCESS); @@ -2047,8 +2038,8 @@ end function qmckl_compute_det_inv_matrix_beta_f #+begin_src c :tangle (eval c_test) :exports none // Get adjoint of the slater-determinant -double det_inv_matrix_alpha[det_num_alpha][walk_num][elec_up_num][elec_up_num]; -double det_inv_matrix_beta[det_num_beta][walk_num][elec_dn_num][elec_dn_num]; +double det_inv_matrix_alpha[det_num_alpha][chbrclf_walk_num][chbrclf_elec_up_num][chbrclf_elec_up_num]; +double det_inv_matrix_beta[det_num_beta][chbrclf_walk_num][chbrclf_elec_dn_num][chbrclf_elec_dn_num]; rc = qmckl_get_det_inv_matrix_alpha(context, &(det_inv_matrix_alpha[0][0][0][0])); assert (rc == QMCKL_SUCCESS); diff --git a/org/qmckl_nucleus.org b/org/qmckl_nucleus.org index 319d5d1..e191d3e 100644 --- a/org/qmckl_nucleus.org +++ b/org/qmckl_nucleus.org @@ -672,7 +672,6 @@ end interface ** Test #+begin_src c :tangle (eval c_test) -const int64_t nucl_num = chbrclf_nucl_num; const double* nucl_charge = chbrclf_charge; const double* nucl_coord = &(chbrclf_nucl_coord[0][0]); const double nucl_rescale_factor_kappa = 2.0; @@ -688,13 +687,13 @@ rc = qmckl_get_nucleus_num (context, &n); assert(rc == QMCKL_NOT_PROVIDED); -rc = qmckl_set_nucleus_num (context, nucl_num); +rc = qmckl_set_nucleus_num (context, chbrclf_nucl_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_nucleus_provided(context)); rc = qmckl_get_nucleus_num (context, &n); assert(rc == QMCKL_SUCCESS); -assert(n == nucl_num); +assert(n == chbrclf_nucl_num); double k; rc = qmckl_get_nucleus_rescale_factor (context, &k); @@ -709,41 +708,41 @@ rc = qmckl_get_nucleus_rescale_factor (context, &k); assert(rc == QMCKL_SUCCESS); assert(k == nucl_rescale_factor_kappa); -double nucl_coord2[3*nucl_num]; +double nucl_coord2[3*chbrclf_nucl_num]; -rc = qmckl_get_nucleus_coord (context, 'T', nucl_coord2, 3*nucl_num); +rc = qmckl_get_nucleus_coord (context, 'T', nucl_coord2, 3*chbrclf_nucl_num); assert(rc == QMCKL_NOT_PROVIDED); -rc = qmckl_set_nucleus_coord (context, 'T', &(nucl_coord[0]), 3*nucl_num); +rc = qmckl_set_nucleus_coord (context, 'T', &(nucl_coord[0]), 3*chbrclf_nucl_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_nucleus_provided(context)); -rc = qmckl_get_nucleus_coord (context, 'N', nucl_coord2, 3*nucl_num); +rc = qmckl_get_nucleus_coord (context, 'N', nucl_coord2, 3*chbrclf_nucl_num); assert(rc == QMCKL_SUCCESS); for (size_t k=0 ; k<3 ; ++k) { - for (int64_t i=0 ; i Date: Fri, 6 May 2022 11:30:21 +0200 Subject: [PATCH 075/100] Fixed bug in AO HPC --- org/qmckl_ao.org | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 8289aa4..03405bb 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -4797,7 +4797,7 @@ qmckl_ao_polynomial_transp_vgl_hpc (const qmckl_context context, const double* restrict X, const double* restrict R, const int32_t lmax, - int64_t* restrict n, + int64_t* n, int32_t* restrict const L, const int64_t ldl, double* restrict const VGL, @@ -5529,7 +5529,7 @@ qmckl_compute_ao_vgl_hpc_gaussian ( { qmckl_exit_code rc; double ar2[prim_max]; - int32_t powers[prim_max]; + int32_t powers[3*size_max]; double poly_vgl_l1[4][4] = {{1.0, 0.0, 0.0, 0.0}, {0.0, 1.0, 0.0, 0.0}, {0.0, 0.0, 1.0, 0.0}, @@ -5614,7 +5614,6 @@ qmckl_compute_ao_vgl_hpc_gaussian ( nucleus_max_ang_mom[inucl], &n_poly, powers, (int64_t) 3, &(poly_vgl[0][0]), size_max); - assert (rc == QMCKL_SUCCESS); break; } From ec7201783fbfd360d054891d7d093ec34577f98e Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 10 May 2022 19:18:19 +0200 Subject: [PATCH 076/100] Possibility to compute only values --- org/qmckl_ao.org | 1025 ++++++++++++++++++++++++++++++++++++++++++---- org/qmckl_mo.org | 508 ++++++++++++++++++++++- 2 files changed, 1448 insertions(+), 85 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 03405bb..4120690 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -299,6 +299,8 @@ typedef struct qmckl_ao_basis_struct { uint64_t shell_vgl_date; double * restrict ao_vgl; uint64_t ao_vgl_date; + double * restrict ao_value; + uint64_t ao_value_date; int32_t uninitialized; bool provided; @@ -2490,8 +2492,10 @@ free(ao_factor_test); | ~primitive_vgl_date~ | ~uint64_t~ | Last modification date of Value, gradients, Laplacian of the primitives at current positions | | ~shell_vgl~ | ~double[point_num][5][shell_num]~ | Value, gradients, Laplacian of the primitives at current positions | | ~shell_vgl_date~ | ~uint64_t~ | Last modification date of Value, gradients, Laplacian of the AOs at current positions | - | ~ao_vgl~ | ~double[point_num][5][ao_num]~ | Value, gradients, Laplacian of the primitives at current positions | + | ~ao_vgl~ | ~double[point_num][5][ao_num]~ | Value, gradients, Laplacian of the AOs at current positions | | ~ao_vgl_date~ | ~uint64_t~ | Last modification date of Value, gradients, Laplacian of the AOs at current positions | + | ~ao_value~ | ~double[point_num][ao_num]~ | Values of the the AOs at current positions | + | ~ao_value_date~ | ~uint64_t~ | Last modification date of the values of the AOs at current positions | |----------------------+-----------------------------------+----------------------------------------------------------------------------------------------| *** After initialization @@ -3022,7 +3026,7 @@ qmckl_get_ao_basis_ao_vgl (qmckl_context context, end interface #+end_src - Uses the give array to compute the VGL. + Uses the given array to compute the VGL. #+begin_src c :comments org :tangle (eval h_func) :noweb yes qmckl_exit_code @@ -3030,7 +3034,7 @@ qmckl_get_ao_basis_ao_vgl_inplace (qmckl_context context, double* const ao_vgl, const int64_t size_max); #+end_src - + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none qmckl_exit_code qmckl_get_ao_basis_ao_vgl_inplace (qmckl_context context, @@ -3088,6 +3092,133 @@ qmckl_get_ao_basis_ao_vgl_inplace (qmckl_context context, end interface #+end_src + + + #+begin_src c :comments org :tangle (eval h_func) :noweb yes +qmckl_exit_code +qmckl_get_ao_basis_ao_value (qmckl_context context, + double* const ao_value, + const int64_t size_max); + #+end_src + + Returns the array of values of the atomic orbitals evaluated at + the current coordinates. See section [[Combining radial and polynomial parts]]. + + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none +qmckl_exit_code +qmckl_get_ao_basis_ao_value (qmckl_context context, + double* const ao_value, + const int64_t size_max) +{ + + if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { + return qmckl_failwith( context, + QMCKL_INVALID_CONTEXT, + "qmckl_get_ao_basis_ao_value", + NULL); + } + + qmckl_exit_code rc; + + rc = qmckl_provide_ao_value(context); + if (rc != QMCKL_SUCCESS) return rc; + + qmckl_context_struct* const ctx = (qmckl_context_struct*) context; + assert (ctx != NULL); + + int64_t sze = ctx->ao_basis.ao_num * ctx->point.num; + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_ao_basis_ao_value", + "input array too small"); + } + memcpy(ao_value, ctx->ao_basis.ao_value, (size_t) sze * sizeof(double)); + + return QMCKL_SUCCESS; +} + #+end_src + + #+begin_src f90 :tangle (eval fh_func) :comments org :exports none + interface + integer(c_int32_t) function qmckl_get_ao_basis_ao_value (context, & + ao_value, size_max) bind(C) + use, intrinsic :: iso_c_binding + import + implicit none + integer (c_int64_t) , intent(in) , value :: context + double precision, intent(out) :: ao_value(*) + integer (c_int64_t) , intent(in) , value :: size_max + end function qmckl_get_ao_basis_ao_value + end interface + #+end_src + + Uses the given array to compute the value. + + #+begin_src c :comments org :tangle (eval h_func) :noweb yes +qmckl_exit_code +qmckl_get_ao_basis_ao_value_inplace (qmckl_context context, + double* const ao_value, + const int64_t size_max); + #+end_src + + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none +qmckl_exit_code +qmckl_get_ao_basis_ao_value_inplace (qmckl_context context, + double* const ao_value, + const int64_t size_max) +{ + + if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { + return qmckl_failwith( context, + QMCKL_INVALID_CONTEXT, + "qmckl_get_ao_basis_ao_value", + NULL); + } + + qmckl_exit_code rc; + + qmckl_context_struct* const ctx = (qmckl_context_struct*) context; + assert (ctx != NULL); + + int64_t sze = ctx->ao_basis.ao_num * ctx->point.num; + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_ao_basis_ao_value", + "input array too small"); + } + + rc = qmckl_context_touch(context); + if (rc != QMCKL_SUCCESS) return rc; + + double* old_array = ctx->ao_basis.ao_value; + + ctx->ao_basis.ao_value = ao_value; + + rc = qmckl_provide_ao_value(context); + if (rc != QMCKL_SUCCESS) return rc; + + ctx->ao_basis.ao_value = old_array; + + return QMCKL_SUCCESS; +} + #+end_src + + #+begin_src f90 :tangle (eval fh_func) :comments org :exports none + interface + integer(c_int32_t) function qmckl_get_ao_basis_ao_value_inplace (context, & + ao_value, size_max) bind(C) + use, intrinsic :: iso_c_binding + import + implicit none + integer (c_int64_t) , intent(in) , value :: context + double precision, intent(out) :: ao_value(*) + integer (c_int64_t) , intent(in) , value :: size_max + end function qmckl_get_ao_basis_ao_value_inplace + end interface + #+end_src + * Radial part ** General functions for Gaussian basis functions @@ -5285,33 +5416,780 @@ for (int32_t ldl=3 ; ldl<=5 ; ++ldl) { #+end_src * Combining radial and polynomial parts + +** Values only + :PROPERTIES: + :Name: qmckl_compute_ao_value + :CRetType: qmckl_exit_code + :FRetType: qmckl_exit_code + :END: +*** Unoptimized version + #+NAME: qmckl_ao_value_args_doc + | Variable | Type | In/Out | Description | + |-----------------------+-----------------------------------+--------+----------------------------------------------| + | ~context~ | ~qmckl_context~ | in | Global state | + | ~ao_num~ | ~int64_t~ | in | Number of AOs | + | ~shell_num~ | ~int64_t~ | in | Number of shells | + | ~point_num~ | ~int64_t~ | in | Number of points | + | ~nucl_num~ | ~int64_t~ | in | Number of nuclei | + | ~coord~ | ~double[3][point_num]~ | in | Coordinates | + | ~nucl_coord~ | ~double[3][nucl_num]~ | in | Nuclear coordinates | + | ~nucleus_index~ | ~int64_t[nucl_num]~ | in | Index of the 1st shell of each nucleus | + | ~nucleus_shell_num~ | ~int64_t[nucl_num]~ | in | Number of shells per nucleus | + | ~nucleus_range~ | ~double[nucl_num]~ | in | Range beyond which all is zero | + | ~nucleus_max_ang_mom~ | ~int32_t[nucl_num]~ | in | Maximum angular momentum per nucleus | + | ~shell_ang_mom~ | ~int32_t[shell_num]~ | in | Angular momentum of each shell | + | ~ao_factor~ | ~double[ao_num]~ | in | Normalization factor of the AOs | + | ~shell_vgl~ | ~double[point_num][5][shell_num]~ | in | Value, gradients and Laplacian of the shells | + | ~ao_value~ | ~double[point_num][ao_num]~ | out | Values of the AOs | + + #+begin_src f90 :comments org :tangle (eval f) :noweb yes +integer function qmckl_compute_ao_value_doc_f(context, & + ao_num, shell_num, point_num, nucl_num, & + coord, nucl_coord, nucleus_index, nucleus_shell_num, & + nucleus_range, nucleus_max_ang_mom, shell_ang_mom, & + ao_factor, shell_vgl, ao_value) & + result(info) + use qmckl + implicit none + integer(qmckl_context), intent(in) :: context + integer*8 , intent(in) :: ao_num + integer*8 , intent(in) :: shell_num + integer*8 , intent(in) :: point_num + integer*8 , intent(in) :: nucl_num + double precision , intent(in) :: coord(point_num,3) + double precision , intent(in) :: nucl_coord(nucl_num,3) + integer*8 , intent(in) :: nucleus_index(nucl_num) + integer*8 , intent(in) :: nucleus_shell_num(nucl_num) + double precision , intent(in) :: nucleus_range(nucl_num) + integer , intent(in) :: nucleus_max_ang_mom(nucl_num) + integer , intent(in) :: shell_ang_mom(shell_num) + double precision , intent(in) :: ao_factor(ao_num) + double precision , intent(in) :: shell_vgl(shell_num,5,point_num) + double precision , intent(out) :: ao_value(ao_num,point_num) + + double precision :: e_coord(3), n_coord(3) + integer*8 :: n_poly + integer :: l, il, k + integer*8 :: ipoint, inucl, ishell + integer*8 :: ishell_start, ishell_end + integer :: lstart(0:20) + double precision :: x, y, z, r2 + double precision :: cutoff + integer, external :: qmckl_ao_polynomial_vgl_doc_f + + double precision, allocatable :: poly_vgl(:,:) + integer , allocatable :: powers(:,:), ao_index(:) + + allocate(poly_vgl(5,ao_num), powers(3,ao_num), ao_index(ao_num)) + + ! Pre-computed data + do l=0,20 + lstart(l) = l*(l+1)*(l+2)/6 +1 + end do + + k=1 + do inucl=1,nucl_num + ishell_start = nucleus_index(inucl) + 1 + ishell_end = nucleus_index(inucl) + nucleus_shell_num(inucl) + do ishell = ishell_start, ishell_end + l = shell_ang_mom(ishell) + ao_index(ishell) = k + k = k + lstart(l+1) - lstart(l) + end do + end do + info = QMCKL_SUCCESS + + ! Don't compute polynomials when the radial part is zero. + cutoff = -dlog(1.d-12) + + do ipoint = 1, point_num + e_coord(1) = coord(ipoint,1) + e_coord(2) = coord(ipoint,2) + e_coord(3) = coord(ipoint,3) + do inucl=1,nucl_num + n_coord(1) = nucl_coord(inucl,1) + n_coord(2) = nucl_coord(inucl,2) + n_coord(3) = nucl_coord(inucl,3) + + ! Test if the point is in the range of the nucleus + x = e_coord(1) - n_coord(1) + y = e_coord(2) - n_coord(2) + z = e_coord(3) - n_coord(3) + + r2 = x*x + y*y + z*z + + if (r2 > cutoff*nucleus_range(inucl)) then + cycle + end if + + ! Compute polynomials + info = qmckl_ao_polynomial_vgl_doc_f(context, e_coord, n_coord, & + nucleus_max_ang_mom(inucl), n_poly, powers, 3_8, & + poly_vgl, 5_8) + + ! Loop over shells + ishell_start = nucleus_index(inucl) + 1 + ishell_end = nucleus_index(inucl) + nucleus_shell_num(inucl) + do ishell = ishell_start, ishell_end + k = ao_index(ishell) + l = shell_ang_mom(ishell) + do il = lstart(l), lstart(l+1)-1 + ! Value + ao_value(k,ipoint) = & + poly_vgl(1,il) * shell_vgl(ishell,1,ipoint) * ao_factor(k) + k = k+1 + end do + end do + end do + end do + + deallocate(poly_vgl, powers) +end function qmckl_compute_ao_value_doc_f + #+end_src + +*** HPC version + #+NAME: qmckl_ao_value_args_hpc_gaussian + | Variable | Type | In/Out | Description | + |-----------------------+-----------------------------+--------+----------------------------------------------| + | ~context~ | ~qmckl_context~ | in | Global state | + | ~ao_num~ | ~int64_t~ | in | Number of AOs | + | ~shell_num~ | ~int64_t~ | in | Number of shells | + | ~prim_num~ | ~int64_t~ | in | Number of primitives | + | ~point_num~ | ~int64_t~ | in | Number of points | + | ~nucl_num~ | ~int64_t~ | in | Number of nuclei | + | ~coord~ | ~double[3][point_num]~ | in | Coordinates | + | ~nucl_coord~ | ~double[3][nucl_num]~ | in | Nuclear coordinates | + | ~nucleus_index~ | ~int64_t[nucl_num]~ | in | Index of the 1st shell of each nucleus | + | ~nucleus_shell_num~ | ~int64_t[nucl_num]~ | in | Number of shells per nucleus | + | ~nucleus_range~ | ~double[nucl_num]~ | in | Range beyond which all is zero | + | ~nucleus_max_ang_mom~ | ~int32_t[nucl_num]~ | in | Maximum angular momentum per nucleus | + | ~shell_ang_mom~ | ~int32_t[shell_num]~ | in | Angular momentum of each shell | + | ~shell_prim_index~ | ~int64_t[shell_num]~ | in | Index of the 1st primitive of each shell | + | ~shell_prim_num~ | ~int64_t[shell_num]~ | in | Number of primitives per shell | + | ~ao_factor~ | ~double[ao_num]~ | in | Normalization factor of the AOs | + | ~ao_expo~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells | + | ~coef_normalized~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells | + | ~ao_value~ | ~double[point_num][ao_num]~ | out | Values of the AOs | + + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none +#ifdef HAVE_HPC +qmckl_exit_code +qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context, + const int64_t ao_num, + const int64_t shell_num, + const int32_t* restrict prim_num_per_nucleus, + const int64_t point_num, + const int64_t nucl_num, + const double* restrict coord, + const double* restrict nucl_coord, + const int64_t* restrict nucleus_index, + const int64_t* restrict nucleus_shell_num, + const double* nucleus_range, + const int32_t* restrict nucleus_max_ang_mom, + const int32_t* restrict shell_ang_mom, + const double* restrict ao_factor, + const qmckl_matrix expo_per_nucleus, + const qmckl_tensor coef_per_nucleus, + double* restrict const ao_value ) +{ + int32_t lstart[32]; + for (int32_t l=0 ; l<32 ; ++l) { + lstart[l] = l*(l+1)*(l+2)/6; + } + + int64_t ao_index[shell_num+1]; + int64_t size_max = 0; + int64_t prim_max = 0; + int64_t shell_max = 0; + { + int64_t k=0; + for (int inucl=0 ; inucl < nucl_num ; ++inucl) { + prim_max = prim_num_per_nucleus[inucl] > prim_max ? + prim_num_per_nucleus[inucl] : prim_max; + shell_max = nucleus_shell_num[inucl] > shell_max ? + nucleus_shell_num[inucl] : shell_max; + const int64_t ishell_start = nucleus_index[inucl]; + const int64_t ishell_end = nucleus_index[inucl] + nucleus_shell_num[inucl]; + for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) { + const int l = shell_ang_mom[ishell]; + ao_index[ishell] = k; + k += lstart[l+1] - lstart[l]; + size_max = size_max < lstart[l+1] ? lstart[l+1] : size_max; + } + } + ao_index[shell_num] = ao_num+1; + } + + /* Don't compute polynomials when the radial part is zero. */ + double cutoff = -log(1.e-12); + +#ifdef HAVE_OPENMP +#pragma omp parallel +#endif + { + qmckl_exit_code rc; + double ar2[prim_max]; + int32_t powers[3*size_max]; + double poly_vgl[5*size_max]; + + double exp_mat[prim_max]; + double ce_mat[shell_max]; + + double coef_mat[nucl_num][shell_max][prim_max]; + for (int i=0 ; i cutoff * nucleus_range[inucl]) { + continue; + } + + int64_t n_poly; + switch (nucleus_max_ang_mom[inucl]) { + case 0: + break; + + case 1: + poly_vgl[0] = 0.; + poly_vgl[1] = x; + poly_vgl[2] = y; + poly_vgl[3] = z; + break; + + case 2: + poly_vgl[0] = 0.; + poly_vgl[1] = x; + poly_vgl[2] = y; + poly_vgl[3] = z; + poly_vgl[4] = x*x; + poly_vgl[5] = x*y; + poly_vgl[6] = x*z; + poly_vgl[7] = y*y; + poly_vgl[8] = y*z; + poly_vgl[9] = z*z; + break; + + default: + rc = qmckl_ao_polynomial_transp_vgl_hpc(context, e_coord, n_coord, + nucleus_max_ang_mom[inucl], + &n_poly, powers, (int64_t) 3, + poly_vgl, size_max); + assert (rc == QMCKL_SUCCESS); + break; + } + + /* Compute all exponents */ + + int64_t nidx = 0; + for (int64_t iprim = 0 ; iprim < prim_num_per_nucleus[inucl] ; ++iprim) { + const double v = qmckl_mat(expo_per_nucleus, iprim, inucl) * r2; + if (v <= cutoff) { + ar2[iprim] = v; + ++nidx; + } else { + break; + } + } + + for (int64_t iprim = 0 ; iprim < nidx ; ++iprim) { + exp_mat[iprim] = exp(-ar2[iprim]); + } + + for (int i=0 ; i 0) { + const double* restrict f = ao_factor + k; + const int64_t idx = lstart[l]; + + poly_vgl_1 = &(poly_vgl[idx]); + + switch (n) { + case(1): + ao_value_1[0] = s1 * f[0]; + break; + case (3): +#ifdef HAVE_OPENMP +#pragma omp simd +#endif + for (int il=0 ; il<3 ; ++il) { + ao_value_1[il] = poly_vgl_1[il] * s1 * f[il]; + } + break; + case(6): +#ifdef HAVE_OPENMP +#pragma omp simd +#endif + for (int il=0 ; il<6 ; ++il) { + ao_value_1[il] = poly_vgl_1[il] * s1 * f[il]; + } + break; + default: +#ifdef HAVE_OPENMP +#pragma omp simd simdlen(8) +#endif + for (int il=0 ; ilao_basis.provided) { + return qmckl_failwith( context, + QMCKL_NOT_PROVIDED, + "qmckl_ao_value", + NULL); + } + + /* Compute if necessary */ + if (ctx->point.date > ctx->ao_basis.ao_value_date) { + + qmckl_exit_code rc; + + /* Provide required data */ +#ifndef HAVE_HPC + rc = qmckl_provide_ao_basis_shell_vgl(context); + if (rc != QMCKL_SUCCESS) { + return qmckl_failwith( context, rc, "qmckl_provide_ao_basis_shell_vgl", NULL); + } +#endif + + /* Allocate array */ + if (ctx->ao_basis.ao_value == NULL) { + + qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; + mem_info.size = ctx->ao_basis.ao_num * 5 * ctx->point.num * sizeof(double); + double* ao_value = (double*) qmckl_malloc(context, mem_info); + + if (ao_value == NULL) { + return qmckl_failwith( context, + QMCKL_ALLOCATION_FAILED, + "qmckl_ao_basis_ao_value", + NULL); + } + ctx->ao_basis.ao_value = ao_value; + } + + if (ctx->ao_basis.ao_vgl_date == ctx->point.date) { + + // ao_vgl has been computed at this step: Just copy the data. + + double * v = &(ctx->ao_basis.ao_value[0]); + double * vgl = &(ctx->ao_basis.ao_vgl[0]); + for (int i=0 ; ipoint.num ; ++i) { + for (int k=0 ; kao_basis.ao_num ; ++k) { + v[k] = vgl[k]; + } + v += ctx->ao_basis.ao_num; + vgl += ctx->ao_basis.ao_num * 5; + } + + } else { + +#ifdef HAVE_HPC + if (ctx->ao_basis.type == 'G') { + rc = qmckl_compute_ao_value_hpc_gaussian(context, + ctx->ao_basis.ao_num, + ctx->ao_basis.shell_num, + ctx->ao_basis.prim_num_per_nucleus, + ctx->point.num, + ctx->nucleus.num, + ctx->point.coord.data, + ctx->nucleus.coord.data, + ctx->ao_basis.nucleus_index, + ctx->ao_basis.nucleus_shell_num, + ctx->ao_basis.nucleus_range, + ctx->ao_basis.nucleus_max_ang_mom, + ctx->ao_basis.shell_ang_mom, + ctx->ao_basis.ao_factor, + ctx->ao_basis.expo_per_nucleus, + ctx->ao_basis.coef_per_nucleus, + ctx->ao_basis.ao_value); + /* + } else if (ctx->ao_basis.type == 'S') { + rc = qmck_compute_ao_value_hpc_slater(context, + ctx->ao_basis.ao_num, + ctx->ao_basis.shell_num, + ctx->ao_basis.prim_num, + ctx->point.num, + ctx->nucleus.num, + ctx->point.coord.data, + ctx->nucleus.coord.data, + ctx->ao_basis.nucleus_index, + ctx->ao_basis.nucleus_shell_num, + ctx->ao_basis.nucleus_range, + ctx->ao_basis.nucleus_max_ang_mom, + ctx->ao_basis.shell_ang_mom, + ctx->ao_basis.shell_prim_index, + ctx->ao_basis.shell_prim_num, + ctx->ao_basis.ao_factor, + ctx->ao_basis.exponent, + ctx->ao_basis.coefficient_normalized, + ctx->ao_basis.ao_value); + ,*/ + } else { + rc = qmckl_compute_ao_value_doc(context, + ctx->ao_basis.ao_num, + ctx->ao_basis.shell_num, + ctx->point.num, + ctx->nucleus.num, + ctx->point.coord.data, + ctx->nucleus.coord.data, + ctx->ao_basis.nucleus_index, + ctx->ao_basis.nucleus_shell_num, + ctx->ao_basis.nucleus_range, + ctx->ao_basis.nucleus_max_ang_mom, + ctx->ao_basis.shell_ang_mom, + ctx->ao_basis.ao_factor, + ctx->ao_basis.shell_vgl, + ctx->ao_basis.ao_value); + } +#else + rc = qmckl_compute_ao_value_doc(context, + ctx->ao_basis.ao_num, + ctx->ao_basis.shell_num, + ctx->point.num, + ctx->nucleus.num, + ctx->point.coord.data, + ctx->nucleus.coord.data, + ctx->ao_basis.nucleus_index, + ctx->ao_basis.nucleus_shell_num, + ctx->ao_basis.nucleus_range, + ctx->ao_basis.nucleus_max_ang_mom, + ctx->ao_basis.shell_ang_mom, + ctx->ao_basis.ao_factor, + ctx->ao_basis.shell_vgl, + ctx->ao_basis.ao_value); +#endif + if (rc != QMCKL_SUCCESS) { + return rc; + } + + } + + ctx->ao_basis.ao_value_date = ctx->date; + } + + return QMCKL_SUCCESS; +} + #+end_src + +**** Test :noexport: + + #+begin_src python :results output :exports none +import numpy as np +from math import sqrt + +h0 = 1.e-4 +def f(a,x,y): + return np.sum( [c * np.exp( -b*(np.linalg.norm(x-y))**2) for b,c in a] ) + +elec_26_w1 = np.array( [ 1.49050402641, 2.90106987953, -1.05920815468 ] ) +elec_15_w2 = np.array( [ -2.20180344582,-1.9113150239, 2.2193744778600002 ] ) +nucl_1 = np.array( [ -2.302574592081335e+00, -3.542027060505035e-01, -5.334129934317614e-02] ) + +#double ao_value[prim_num][5][elec_num]; +x = elec_26_w1 ; y = nucl_1 +a = [( 4.0382999999999998e+02, 1.4732000000000000e-03 * 5.9876577632594533e+04), + ( 1.2117000000000000e+02, 1.2672500000000000e-02 * 7.2836806319891484e+03), + ( 4.6344999999999999e+01, 5.8045100000000002e-02 * 1.3549226646722386e+03), + ( 1.9721000000000000e+01, 1.7051030000000000e-01 * 3.0376315094739988e+02), + ( 8.8623999999999992e+00, 3.1859579999999998e-01 * 7.4924579607137730e+01), + ( 3.9962000000000000e+00, 3.8450230000000002e-01 * 1.8590543353806009e+01), + ( 1.7636000000000001e+00, 2.7377370000000001e-01 * 4.4423176930919421e+00), + ( 7.0618999999999998e-01, 7.4396699999999996e-02 * 8.9541051939952665e-01)] + +norm = sqrt(3.) +# x^2 * g(r) +print ( "[26][0][219] : %25.15e"%(fx(a,x,y)) ) +print ( "[26][1][219] : %25.15e"%(df(a,x,y,1)) ) +print ( "[26][2][219] : %25.15e"%(df(a,x,y,2)) ) +print ( "[26][3][219] : %25.15e"%(df(a,x,y,3)) ) +print ( "[26][4][219] : %25.15e"%(lf(a,x,y)) ) + +print ( "[26][0][220] : %25.15e"%(norm*f(a,x,y) * (x[0] - y[0]) * (x[1] - y[1]) )) +print ( "[26][1][220] : %25.15e"%(norm*df(a,x,y,1)* (x[0] - y[0]) * (x[1] - y[1]) + norm*f(a,x,y) * (x[1] - y[1])) ) + +print ( "[26][0][221] : %25.15e"%(norm*f(a,x,y) * (x[0] - y[0]) * (x[2] - y[2])) ) +print ( "[26][1][221] : %25.15e"%(norm*df(a,x,y,1)* (x[0] - y[0]) * (x[2] - y[2]) + norm*f(a,x,y) * (x[2] - y[2])) ) + +print ( "[26][0][222] : %25.15e"%(f(a,x,y) * (x[1] - y[1]) * (x[1] - y[1])) ) +print ( "[26][1][222] : %25.15e"%(df(a,x,y,1)* (x[1] - y[1]) * (x[1] - y[1])) ) + +print ( "[26][0][223] : %25.15e"%(norm*f(a,x,y) * (x[1] - y[1]) * (x[2] - y[2])) ) +print ( "[26][1][223] : %25.15e"%(norm*df(a,x,y,1)* (x[1] - y[1]) * (x[2] - y[2])) ) + +print ( "[26][0][224] : %25.15e"%(f(a,x,y) * (x[2] - y[2]) * (x[2] - y[2])) ) +print ( "[26][1][224] : %25.15e"%(df(a,x,y,1)* (x[2] - y[2]) * (x[2] - y[2])) ) + + #+end_src + + #+RESULTS: + + #+begin_src c :tangle (eval c_test) :exports none +{ +#define walk_num 1 // chbrclf_walk_num +#define elec_num chbrclf_elec_num +#define shell_num chbrclf_shell_num +#define ao_num chbrclf_ao_num + +int64_t elec_up_num = chbrclf_elec_up_num; +int64_t elec_dn_num = chbrclf_elec_dn_num; +double* elec_coord = &(chbrclf_elec_coord[0][0][0]); + +rc = qmckl_set_electron_num (context, elec_up_num, elec_dn_num); +assert (rc == QMCKL_SUCCESS); + +rc = qmckl_set_electron_walk_num (context, walk_num); +assert (rc == QMCKL_SUCCESS); + +assert(qmckl_electron_provided(context)); + +rc = qmckl_set_electron_coord (context, 'N', elec_coord, walk_num*elec_num*3); +assert(rc == QMCKL_SUCCESS); + + +double ao_value[elec_num][ao_num]; + +rc = qmckl_get_ao_basis_ao_value(context, &(ao_value[0][0]), + (int64_t) elec_num*ao_num); +assert (rc == QMCKL_SUCCESS); + +printf("\n"); +printf(" ao_value ao_value[26][219] %25.15e\n", ao_value[26][219]); +printf(" ao_value ao_value[26][220] %25.15e\n", ao_value[26][220]); +printf(" ao_value ao_value[26][221] %25.15e\n", ao_value[26][221]); +printf(" ao_value ao_value[26][222] %25.15e\n", ao_value[26][222]); +printf(" ao_value ao_value[26][223] %25.15e\n", ao_value[26][223]); +printf(" ao_value ao_value[26][224] %25.15e\n", ao_value[26][224]); +printf("\n"); + +assert( fabs(ao_value[26][219] - ( 1.020298798341620e-08)) < 1.e-14 ); +assert( fabs(ao_value[26][220] - ( 1.516643537739178e-08)) < 1.e-14 ); +assert( fabs(ao_value[26][221] - ( -4.686370882518819e-09)) < 1.e-14 ); +assert( fabs(ao_value[26][222] - ( 7.514816980753531e-09)) < 1.e-14 ); +assert( fabs(ao_value[26][223] - ( -4.021908374204471e-09)) < 1.e-14 ); +assert( fabs(ao_value[26][224] - ( 7.175045873560788e-10)) < 1.e-14 ); + +} + + #+end_src + +** Value, gradients, Laplacian :PROPERTIES: :Name: qmckl_compute_ao_vgl :CRetType: qmckl_exit_code :FRetType: qmckl_exit_code :END: +*** Unoptimized version + #+NAME: qmckl_ao_vgl_args_doc + | Variable | Type | In/Out | Description | + |-----------------------+-----------------------------------+--------+----------------------------------------------| + | ~context~ | ~qmckl_context~ | in | Global state | + | ~ao_num~ | ~int64_t~ | in | Number of AOs | + | ~shell_num~ | ~int64_t~ | in | Number of shells | + | ~point_num~ | ~int64_t~ | in | Number of points | + | ~nucl_num~ | ~int64_t~ | in | Number of nuclei | + | ~coord~ | ~double[3][point_num]~ | in | Coordinates | + | ~nucl_coord~ | ~double[3][nucl_num]~ | in | Nuclear coordinates | + | ~nucleus_index~ | ~int64_t[nucl_num]~ | in | Index of the 1st shell of each nucleus | + | ~nucleus_shell_num~ | ~int64_t[nucl_num]~ | in | Number of shells per nucleus | + | ~nucleus_range~ | ~double[nucl_num]~ | in | Range beyond which all is zero | + | ~nucleus_max_ang_mom~ | ~int32_t[nucl_num]~ | in | Maximum angular momentum per nucleus | + | ~shell_ang_mom~ | ~int32_t[shell_num]~ | in | Angular momentum of each shell | + | ~ao_factor~ | ~double[ao_num]~ | in | Normalization factor of the AOs | + | ~shell_vgl~ | ~double[point_num][5][shell_num]~ | in | Value, gradients and Laplacian of the shells | + | ~ao_vgl~ | ~double[point_num][5][ao_num]~ | out | Value, gradients and Laplacian of the AOs | -** Unoptimized version - #+NAME: qmckl_ao_vgl_args_doc - | Variable | Type | In/Out | Description | - |-----------------------+-----------------------------------+--------+----------------------------------------------| - | ~context~ | ~qmckl_context~ | in | Global state | - | ~ao_num~ | ~int64_t~ | in | Number of AOs | - | ~shell_num~ | ~int64_t~ | in | Number of shells | - | ~point_num~ | ~int64_t~ | in | Number of points | - | ~nucl_num~ | ~int64_t~ | in | Number of nuclei | - | ~coord~ | ~double[3][point_num]~ | in | Coordinates | - | ~nucl_coord~ | ~double[3][nucl_num]~ | in | Nuclear coordinates | - | ~nucleus_index~ | ~int64_t[nucl_num]~ | in | Index of the 1st shell of each nucleus | - | ~nucleus_shell_num~ | ~int64_t[nucl_num]~ | in | Number of shells per nucleus | - | ~nucleus_range~ | ~double[nucl_num]~ | in | Range beyond which all is zero | - | ~nucleus_max_ang_mom~ | ~int32_t[nucl_num]~ | in | Maximum angular momentum per nucleus | - | ~shell_ang_mom~ | ~int32_t[shell_num]~ | in | Angular momentum of each shell | - | ~ao_factor~ | ~double[ao_num]~ | in | Normalization factor of the AOs | - | ~shell_vgl~ | ~double[point_num][5][shell_num]~ | in | Value, gradients and Laplacian of the shells | - | ~ao_vgl~ | ~double[point_num][5][ao_num]~ | out | Value, gradients and Laplacian of the AOs | - - #+begin_src f90 :comments org :tangle (eval f) :noweb yes + #+begin_src f90 :comments org :tangle (eval f) :noweb yes integer function qmckl_compute_ao_vgl_doc_f(context, & ao_num, shell_num, point_num, nucl_num, & coord, nucl_coord, nucleus_index, nucleus_shell_num, & @@ -5443,34 +6321,34 @@ integer function qmckl_compute_ao_vgl_doc_f(context, & deallocate(poly_vgl, powers) end function qmckl_compute_ao_vgl_doc_f - #+end_src + #+end_src -** HPC version - #+NAME: qmckl_ao_vgl_args_hpc_gaussian - | Variable | Type | In/Out | Description | - |-----------------------+--------------------------------+--------+----------------------------------------------| - | ~context~ | ~qmckl_context~ | in | Global state | - | ~ao_num~ | ~int64_t~ | in | Number of AOs | - | ~shell_num~ | ~int64_t~ | in | Number of shells | - | ~prim_num~ | ~int64_t~ | in | Number of primitives | - | ~point_num~ | ~int64_t~ | in | Number of points | - | ~nucl_num~ | ~int64_t~ | in | Number of nuclei | - | ~coord~ | ~double[3][point_num]~ | in | Coordinates | - | ~nucl_coord~ | ~double[3][nucl_num]~ | in | Nuclear coordinates | - | ~nucleus_index~ | ~int64_t[nucl_num]~ | in | Index of the 1st shell of each nucleus | - | ~nucleus_shell_num~ | ~int64_t[nucl_num]~ | in | Number of shells per nucleus | - | ~nucleus_range~ | ~double[nucl_num]~ | in | Range beyond which all is zero | - | ~nucleus_max_ang_mom~ | ~int32_t[nucl_num]~ | in | Maximum angular momentum per nucleus | - | ~shell_ang_mom~ | ~int32_t[shell_num]~ | in | Angular momentum of each shell | - | ~shell_prim_index~ | ~int64_t[shell_num]~ | in | Index of the 1st primitive of each shell | - | ~shell_prim_num~ | ~int64_t[shell_num]~ | in | Number of primitives per shell | - | ~ao_factor~ | ~double[ao_num]~ | in | Normalization factor of the AOs | - | ~ao_expo~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells | - | ~coef_normalized~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells | - | ~ao_vgl~ | ~double[point_num][5][ao_num]~ | out | Value, gradients and Laplacian of the AOs | +*** HPC version + #+NAME: qmckl_ao_vgl_args_hpc_gaussian + | Variable | Type | In/Out | Description | + |-----------------------+--------------------------------+--------+----------------------------------------------| + | ~context~ | ~qmckl_context~ | in | Global state | + | ~ao_num~ | ~int64_t~ | in | Number of AOs | + | ~shell_num~ | ~int64_t~ | in | Number of shells | + | ~prim_num~ | ~int64_t~ | in | Number of primitives | + | ~point_num~ | ~int64_t~ | in | Number of points | + | ~nucl_num~ | ~int64_t~ | in | Number of nuclei | + | ~coord~ | ~double[3][point_num]~ | in | Coordinates | + | ~nucl_coord~ | ~double[3][nucl_num]~ | in | Nuclear coordinates | + | ~nucleus_index~ | ~int64_t[nucl_num]~ | in | Index of the 1st shell of each nucleus | + | ~nucleus_shell_num~ | ~int64_t[nucl_num]~ | in | Number of shells per nucleus | + | ~nucleus_range~ | ~double[nucl_num]~ | in | Range beyond which all is zero | + | ~nucleus_max_ang_mom~ | ~int32_t[nucl_num]~ | in | Maximum angular momentum per nucleus | + | ~shell_ang_mom~ | ~int32_t[shell_num]~ | in | Angular momentum of each shell | + | ~shell_prim_index~ | ~int64_t[shell_num]~ | in | Index of the 1st primitive of each shell | + | ~shell_prim_num~ | ~int64_t[shell_num]~ | in | Number of primitives per shell | + | ~ao_factor~ | ~double[ao_num]~ | in | Normalization factor of the AOs | + | ~ao_expo~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells | + | ~coef_normalized~ | ~double[prim_num]~ | in | Value, gradients and Laplacian of the shells | + | ~ao_vgl~ | ~double[point_num][5][ao_num]~ | out | Value, gradients and Laplacian of the AOs | - #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none #ifdef HAVE_HPC qmckl_exit_code qmckl_compute_ao_vgl_hpc_gaussian ( @@ -5784,14 +6662,13 @@ qmckl_compute_ao_vgl_hpc_gaussian ( return QMCKL_SUCCESS; } #endif - #+end_src + #+end_src -** Interfaces -# #+CALL: generate_c_header(table=qmckl_ao_vgl_args_doc,rettyp=get_value("CRetType"),fname="qmckl_compute_ao_vgl")) -# (Commented because the header needs to go into h_private_func) +*** Interfaces + # #+CALL: generate_c_header(table=qmckl_ao_vgl_args_doc,rettyp=get_value("CRetType"),fname="qmckl_compute_ao_vgl")) + # (Commented because the header needs to go into h_private_func) - #+RESULTS: - #+begin_src c :tangle (eval h_private_func) :comments org + #+begin_src c :tangle (eval h_private_func) :comments org qmckl_exit_code qmckl_compute_ao_vgl_doc ( const qmckl_context context, const int64_t ao_num, @@ -5808,8 +6685,9 @@ qmckl_compute_ao_vgl_hpc_gaussian ( const double* ao_factor, const double* shell_vgl, double* const ao_vgl ); - #+end_src - #+begin_src c :tangle (eval h_private_func) :comments org + #+end_src + + #+begin_src c :tangle (eval h_private_func) :comments org #ifdef HAVE_HPC qmckl_exit_code qmckl_compute_ao_vgl_hpc_gaussian ( const qmckl_context context, @@ -5830,12 +6708,12 @@ qmckl_compute_ao_vgl_hpc_gaussian ( const qmckl_tensor coef_per_nucleus, double* const ao_vgl ); #endif - #+end_src + #+end_src - #+CALL: generate_c_interface(table=qmckl_ao_vgl_args_doc,rettyp=get_value("CRetType"),fname="qmckl_compute_ao_vgl_doc")) + #+CALL: generate_c_interface(table=qmckl_ao_vgl_args_doc,rettyp=get_value("CRetType"),fname="qmckl_compute_ao_vgl_doc")) - #+RESULTS: - #+begin_src f90 :tangle (eval f) :comments org :exports none + #+RESULTS: + #+begin_src f90 :tangle (eval f) :comments org :exports none integer(c_int32_t) function qmckl_compute_ao_vgl_doc & (context, & ao_num, & @@ -5892,16 +6770,15 @@ qmckl_compute_ao_vgl_hpc_gaussian ( ao_vgl) end function qmckl_compute_ao_vgl_doc - #+end_src + #+end_src +**** Provide :noexport: -*** Provide :noexport: - - #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none qmckl_exit_code qmckl_provide_ao_vgl(qmckl_context context); - #+end_src + #+end_src - #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none qmckl_exit_code qmckl_provide_ao_vgl(qmckl_context context) { @@ -6034,11 +6911,11 @@ qmckl_exit_code qmckl_provide_ao_vgl(qmckl_context context) return QMCKL_SUCCESS; } - #+end_src + #+end_src -*** Test :noexport: +**** Test :noexport: - #+begin_src python :results output :exports none + #+begin_src python :results output :exports none import numpy as np from math import sqrt @@ -6107,11 +6984,11 @@ print ( "[26][1][223] : %25.15e"%(norm*df(a,x,y,1)* (x[1] - y[1]) * (x[2] - y[2] print ( "[26][0][224] : %25.15e"%(f(a,x,y) * (x[2] - y[2]) * (x[2] - y[2])) ) print ( "[26][1][224] : %25.15e"%(df(a,x,y,1)* (x[2] - y[2]) * (x[2] - y[2])) ) - #+end_src + #+end_src - #+RESULTS: + #+RESULTS: - #+begin_src c :tangle (eval c_test) :exports none + #+begin_src c :tangle (eval c_test) :exports none { #define walk_num 1 // chbrclf_walk_num #define elec_num chbrclf_elec_num @@ -6206,7 +7083,7 @@ assert( fabs(ao_vgl[26][4][224] - ( 3.153244195820293e-08)) < 1.e-14 ); } - #+end_src + #+end_src * End of files :noexport: diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index ac7fdb9..4a148c6 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -92,10 +92,12 @@ int main() { Computed data: - |---------------+--------------------------+-------------------------------------------------------------------------------------| - | ~mo_vgl~ | ~[point_num][5][mo_num]~ | Value, gradients, Laplacian of the MOs at point positions | - | ~mo_vgl_date~ | ~uint64_t~ | Late modification date of Value, gradients, Laplacian of the MOs at point positions | - |---------------+--------------------------+-------------------------------------------------------------------------------------| + |-----------------+--------------------------+-------------------------------------------------------------------------------------| + | ~mo_value~ | ~[point_num][mo_num]~ | Value of the MOs at point positions | + | ~mo_value_date~ | ~uint64_t~ | Late modification date of the value of the MOs at point positions | + | ~mo_vgl~ | ~[point_num][5][mo_num]~ | Value, gradients, Laplacian of the MOs at point positions | + | ~mo_vgl_date~ | ~uint64_t~ | Late modification date of Value, gradients, Laplacian of the MOs at point positions | + |-----------------+--------------------------+-------------------------------------------------------------------------------------| ** Data structure @@ -106,7 +108,9 @@ typedef struct qmckl_mo_basis_struct { double * restrict coefficient_t; double * restrict mo_vgl; + double * restrict mo_value; uint64_t mo_vgl_date; + uint64_t mo_value_date; int32_t uninitialized; bool provided; @@ -418,7 +422,464 @@ qmckl_exit_code qmckl_finalize_mo_basis(qmckl_context context) { * Computation -** Computation of MOs +** Computation of MOs: values only + +*** Get + + #+begin_src c :comments org :tangle (eval h_func) :noweb yes +qmckl_exit_code +qmckl_get_mo_basis_mo_value(qmckl_context context, + double* const mo_value, + const int64_t size_max); + #+end_src + + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none +qmckl_exit_code +qmckl_get_mo_basis_mo_value(qmckl_context context, + double* const mo_value, + const int64_t size_max) +{ + + if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { + return QMCKL_NULL_CONTEXT; + } + + qmckl_exit_code rc; + + rc = qmckl_provide_ao_value(context); + if (rc != QMCKL_SUCCESS) return rc; + + rc = qmckl_provide_mo_value(context); + if (rc != QMCKL_SUCCESS) return rc; + + qmckl_context_struct* const ctx = (qmckl_context_struct*) context; + assert (ctx != NULL); + + const int64_t sze = ctx->point.num * ctx->mo_basis.mo_num; + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_mo_basis_mo_value", + "input array too small"); + } + memcpy(mo_value, ctx->mo_basis.mo_value, sze * sizeof(double)); + + return QMCKL_SUCCESS; +} + #+end_src + + #+begin_src f90 :tangle (eval fh_func) :comments org :exports none + interface + integer(c_int32_t) function qmckl_get_mo_basis_mo_value (context, & + mo_value, size_max) bind(C) + use, intrinsic :: iso_c_binding + import + implicit none + + integer (c_int64_t) , intent(in) , value :: context + double precision, intent(out) :: mo_value(*) + integer (c_int64_t) , intent(in) , value :: size_max + end function qmckl_get_mo_basis_mo_value + end interface + #+end_src + + Uses the given array to compute the values. + + #+begin_src c :comments org :tangle (eval h_func) :noweb yes +qmckl_exit_code +qmckl_get_mo_basis_mo_value_inplace (qmckl_context context, + double* const mo_value, + const int64_t size_max); + #+end_src + + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none +qmckl_exit_code +qmckl_get_mo_basis_mo_value_inplace (qmckl_context context, + double* const mo_value, + const int64_t size_max) +{ + + if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { + return qmckl_failwith( context, + QMCKL_INVALID_CONTEXT, + "qmckl_get_mo_basis_mo_value", + NULL); + } + + qmckl_exit_code rc; + + qmckl_context_struct* const ctx = (qmckl_context_struct*) context; + assert (ctx != NULL); + + const int64_t sze = ctx->mo_basis.mo_num * ctx->point.num; + if (size_max < sze) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_3, + "qmckl_get_mo_basis_mo_value", + "input array too small"); + } + + rc = qmckl_context_touch(context); + if (rc != QMCKL_SUCCESS) return rc; + + double* old_array = ctx->mo_basis.mo_value; + + ctx->mo_basis.mo_value = mo_value; + + rc = qmckl_provide_mo_value(context); + if (rc != QMCKL_SUCCESS) return rc; + + ctx->mo_basis.mo_value = old_array; + + return QMCKL_SUCCESS; +} + #+end_src + + #+begin_src f90 :tangle (eval fh_func) :comments org :exports none + interface + integer(c_int32_t) function qmckl_get_mo_basis_mo_value_inplace (context, & + mo_value, size_max) bind(C) + use, intrinsic :: iso_c_binding + import + implicit none + integer (c_int64_t) , intent(in) , value :: context + double precision, intent(out) :: mo_value(*) + integer (c_int64_t) , intent(in) , value :: size_max + end function qmckl_get_mo_basis_mo_value_inplace + end interface + #+end_src + +*** Provide + + #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none +qmckl_exit_code qmckl_provide_mo_value(qmckl_context context); + #+end_src + + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none +qmckl_exit_code qmckl_provide_mo_value(qmckl_context context) +{ + + qmckl_exit_code rc; + if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { + return QMCKL_NULL_CONTEXT; + } + + qmckl_context_struct* const ctx = (qmckl_context_struct*) context; + assert (ctx != NULL); + + if (!ctx->ao_basis.provided) { + return qmckl_failwith( context, + QMCKL_NOT_PROVIDED, + "qmckl_ao_basis", + NULL); + } + + rc = qmckl_provide_ao_value(context); + if (rc != QMCKL_SUCCESS) { + return qmckl_failwith( context, + QMCKL_NOT_PROVIDED, + "qmckl_ao_value", + NULL); + } + + if (!ctx->mo_basis.provided) { + return qmckl_failwith( context, + QMCKL_NOT_PROVIDED, + "qmckl_mo_basis", + NULL); + } + + /* Compute if necessary */ + if (ctx->point.date > ctx->mo_basis.mo_value_date) { + + /* Allocate array */ + if (ctx->mo_basis.mo_value == NULL) { + + qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; + mem_info.size = ctx->point.num * ctx->mo_basis.mo_num * sizeof(double); + double* mo_value = (double*) qmckl_malloc(context, mem_info); + + if (mo_value == NULL) { + return qmckl_failwith( context, + QMCKL_ALLOCATION_FAILED, + "qmckl_mo_basis_mo_value", + NULL); + } + ctx->mo_basis.mo_value = mo_value; + } + + if (ctx->mo_basis.mo_vgl_date == ctx->point.date) { + + // mo_vgl has been computed at this step: Just copy the data. + + double * v = &(ctx->mo_basis.mo_value[0]); + double * vgl = &(ctx->mo_basis.mo_vgl[0]); + for (int i=0 ; ipoint.num ; ++i) { + for (int k=0 ; kmo_basis.mo_num ; ++k) { + v[k] = vgl[k]; + } + v += ctx->mo_basis.mo_num; + vgl += ctx->mo_basis.mo_num * 5; + } + + } else { + + rc = qmckl_compute_mo_basis_mo_value(context, + ctx->ao_basis.ao_num, + ctx->mo_basis.mo_num, + ctx->point.num, + ctx->mo_basis.coefficient_t, + ctx->ao_basis.ao_value, + ctx->mo_basis.mo_value); + + if (rc != QMCKL_SUCCESS) { + return rc; + } + + } + + ctx->mo_basis.mo_value_date = ctx->date; + } + + return QMCKL_SUCCESS; +} + #+end_src + +*** Compute + :PROPERTIES: + :Name: qmckl_compute_mo_basis_mo_value + :CRetType: qmckl_exit_code + :FRetType: qmckl_exit_code + :END: + + #+NAME: qmckl_mo_basis_mo_value_args + | Variable | Type | In/Out | Description | + |---------------------+-----------------------------+--------+-------------------------------------------------| + | ~context~ | ~qmckl_context~ | in | Global state | + | ~ao_num~ | ~int64_t~ | in | Number of AOs | + | ~mo_num~ | ~int64_t~ | in | Number of MOs | + | ~point_num~ | ~int64_t~ | in | Number of points | + | ~coef_normalized_t~ | ~double[mo_num][ao_num]~ | in | Transpose of the AO to MO transformation matrix | + | ~ao_value~ | ~double[point_num][ao_num]~ | in | Value of the AOs | + | ~mo_value~ | ~double[point_num][mo_num]~ | out | Value of the MOs | + + + The matrix of AO values is very sparse, so we use a sparse-dense + matrix multiplication instead of a dgemm, as exposed in + https://dx.doi.org/10.1007/978-3-642-38718-0_14. + + + + #+begin_src f90 :comments org :tangle (eval f) :noweb yes +integer function qmckl_compute_mo_basis_mo_value_doc_f(context, & + ao_num, mo_num, point_num, & + coef_normalized_t, ao_value, mo_value) & + result(info) + use qmckl + implicit none + integer(qmckl_context), intent(in) :: context + integer*8 , intent(in) :: ao_num, mo_num + integer*8 , intent(in) :: point_num + double precision , intent(in) :: ao_value(ao_num,point_num) + double precision , intent(in) :: coef_normalized_t(mo_num,ao_num) + double precision , intent(out) :: mo_value(mo_num,point_num) + integer*8 :: i,j,k + double precision :: c1, c2, c3, c4, c5 + + integer*8 :: LDA, LDB, LDC + + info = QMCKL_SUCCESS + if (.True.) then ! fast algorithm + do j=1,point_num + mo_value(:,j) = 0.d0 + do k=1,ao_num + if (ao_value(k,j) /= 0.d0) then + c1 = ao_value(k,j) + do i=1,mo_num + mo_value(i,j) = mo_value(i,j) + coef_normalized_t(i,k) * c1 + end do + end if + end do + end do + + else ! dgemm + + LDA = size(coef_normalized_t,1) + LDB = size(ao_value,1) + LDC = size(mo_value,1) + + info = qmckl_dgemm(context,'N', 'N', mo_num, point_num, ao_num, 1.d0, & + coef_normalized_t, LDA, ao_value, LDB, & + 0.d0, mo_value, LDC) + + end if + +end function qmckl_compute_mo_basis_mo_value_doc_f + #+end_src + + #+CALL: generate_c_header(table=qmckl_mo_basis_mo_value_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_mo_value")) + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org + qmckl_exit_code qmckl_compute_mo_basis_mo_value ( + const qmckl_context context, + const int64_t ao_num, + const int64_t mo_num, + const int64_t point_num, + const double* coef_normalized_t, + const double* ao_value, + double* const mo_value ); + #+end_src + + #+CALL: generate_c_header(table=qmckl_mo_basis_mo_value_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_mo_value_doc")) + + #+RESULTS: + #+begin_src c :tangle (eval h_func) :comments org + qmckl_exit_code qmckl_compute_mo_basis_mo_value_doc ( + const qmckl_context context, + const int64_t ao_num, + const int64_t mo_num, + const int64_t point_num, + const double* coef_normalized_t, + const double* ao_value, + double* const mo_value ); + #+end_src + + #+CALL: generate_c_interface(table=qmckl_mo_basis_mo_value_args,rettyp=get_value("CRetType"),fname="qmckl_compute_mo_basis_mo_value_doc")) + + #+RESULTS: + #+begin_src f90 :tangle (eval f) :comments org :exports none + integer(c_int32_t) function qmckl_compute_mo_basis_mo_value_doc & + (context, ao_num, mo_num, point_num, coef_normalized_t, ao_value, mo_value) & + bind(C) result(info) + + use, intrinsic :: iso_c_binding + implicit none + + integer (c_int64_t) , intent(in) , value :: context + integer (c_int64_t) , intent(in) , value :: ao_num + integer (c_int64_t) , intent(in) , value :: mo_num + integer (c_int64_t) , intent(in) , value :: point_num + real (c_double ) , intent(in) :: coef_normalized_t(ao_num,mo_num) + real (c_double ) , intent(in) :: ao_value(ao_num,point_num) + real (c_double ) , intent(out) :: mo_value(mo_num,point_num) + + integer(c_int32_t), external :: qmckl_compute_mo_basis_mo_value_doc_f + info = qmckl_compute_mo_basis_mo_value_doc_f & + (context, ao_num, mo_num, point_num, coef_normalized_t, ao_value, mo_value) + + end function qmckl_compute_mo_basis_mo_value_doc + #+end_src + + #+begin_src c :tangle (eval c) :comments org +qmckl_exit_code +qmckl_compute_mo_basis_mo_value (const qmckl_context context, + const int64_t ao_num, + const int64_t mo_num, + const int64_t point_num, + const double* coef_normalized_t, + const double* ao_value, + double* const mo_value ) +{ +#ifdef HAVE_HPC + return qmckl_compute_mo_basis_mo_value_hpc (context, ao_num, mo_num, point_num, coef_normalized_t, ao_value, mo_value); +#else + return qmckl_compute_mo_basis_mo_value_doc (context, ao_num, mo_num, point_num, coef_normalized_t, ao_value, mo_value); +#endif +} + #+end_src + +*** HPC version + + + #+begin_src c :tangle (eval h_func) :comments org +#ifdef HAVE_HPC +qmckl_exit_code +qmckl_compute_mo_basis_mo_value_hpc (const qmckl_context context, + const int64_t ao_num, + const int64_t mo_num, + const int64_t point_num, + const double* coef_normalized_t, + const double* ao_value, + double* const mo_value ); +#endif + #+end_src + + #+begin_src c :tangle (eval c) :comments org +#ifdef HAVE_HPC +qmckl_exit_code +qmckl_compute_mo_basis_mo_value_hpc (const qmckl_context context, + const int64_t ao_num, + const int64_t mo_num, + const int64_t point_num, + const double* restrict coef_normalized_t, + const double* restrict ao_value, + double* restrict const mo_value ) +{ + assert (context != QMCKL_NULL_CONTEXT); + +#ifdef HAVE_OPENMP + #pragma omp parallel for +#endif + for (int64_t ipoint=0 ; ipoint < point_num ; ++ipoint) { + double* restrict const vgl1 = &(mo_value[ipoint*mo_num]); + const double* restrict avgl1 = &(ao_value[ipoint*ao_num]); + + for (int64_t i=0 ; i Date: Fri, 20 May 2022 19:22:56 +0200 Subject: [PATCH 077/100] Fix tests --- org/examples.org | 224 +++++++++++++++++++++++++++---------- org/qmckl_local_energy.org | 2 +- 2 files changed, 167 insertions(+), 59 deletions(-) diff --git a/org/examples.org b/org/examples.org index 58dc366..c1002b2 100644 --- a/org/examples.org +++ b/org/examples.org @@ -3,17 +3,124 @@ #+INCLUDE: ../tools/lib.org In this section, we present examples of usage of QMCkl. -For simplicity, we assume that the wave function parameters are stores +For simplicity, we assume that the wave function parameters are stored in a [[https://github.com/TREX-CoE/trexio][TREXIO]] file. -* Checking errors +* Python - All QMCkl functions return an error code. A convenient way to handle - errors is to write an error-checking function that displays the - error in text format and exits the program. +** Check numerically that MOs are orthonormal - #+NAME: qmckl_check_error - #+begin_src f90 + In this example, we will compute the numerically the overlap + between the molecular orbitals: + + \[ + S_{ij} = \int \phi_i(\mathbf{r}) \phi_j(\mathbf{r}) + \text{d}\mathbf{r} \sim \sum_{k=1}^{N} \phi_i(\mathbf{r}_k) + \phi_j(\mathbf{r}_k) \delta \mathbf{r} + \] + + + #+begin_src python :session +import numpy as np +import qmckl + #+end_src + + #+RESULTS: + + + First, we create a context for the QMCkl calculation, and load the + wave function stored in =h2o_5z.h5= inside it: + + #+begin_src python :session +trexio_filename = "..//share/qmckl/test_data/h2o_5z.h5" + +context = qmckl.context_create() +qmckl.trexio_read(context, trexio_filename) + #+end_src + + #+RESULTS: + : None + + We now define the grid points as a regular grid around the + molecule. + + We fetch the nuclear coordinates from the context, + + #+begin_src python :session :results output +nucl_num = qmckl.get_nucleus_num(context) + +nucl_charge = qmckl.get_nucleus_charge(context, nucl_num) + +nucl_coord = qmckl.get_nucleus_coord(context, 'N', nucl_num*3) +nucl_coord = np.reshape(nucl_coord, (3, nucl_num)) + +for i in range(nucl_num): + print("%d %+f %+f %+f"%(int(nucl_charge[i]), + nucl_coord[i,0], + nucl_coord[i,1], + nucl_coord[i,2]) ) + #+end_src + + #+RESULTS: + : 8 +0.000000 +0.000000 +0.000000 + : 1 -1.430429 +0.000000 -1.107157 + : 1 +1.430429 +0.000000 -1.107157 + + and compute the coordinates of the grid points: + + #+begin_src python :session +nx = ( 40, 40, 40 ) +point_num = nx[0] * nx[1] * nx[2] + +rmin = np.array( list([ np.min(nucl_coord[:,a]) for a in range(3) ]) ) +rmax = np.array( list([ np.max(nucl_coord[:,a]) for a in range(3) ]) ) + +shift = np.array([5.,5.,5.]) + +linspace = [ None for i in range(3) ] +step = [ None for i in range(3) ] +for a in range(3): + linspace[a], step[a] = np.linspace(rmin[a]-shift[a], + rmax[a]+shift[a], + num=nx[a], + retstep=True) + +dr = step[0] * step[1] * step[2] +dr + #+end_src + + #+RESULTS: + : 0.024081249137090373 + + Now the grid is ready, we can create the list of grid points on + which the MOs will be evaluated, and transfer them to the QMCkl + context: + + #+begin_src python :session +point = [] +for x in linspace[0]: + for y in linspace[1]: + for z in linspace[2]: + point += [x, y, z] + +#point = np.array(point) +qmckl.set_point(context, 'N', point, len(point)/3) + #+end_src + + #+RESULTS: + + Then, will first evaluate all the MOs at the grid points, and then we will + compute the overlap between all the MOs. + +* Fortran +** Checking errors + + All QMCkl functions return an error code. A convenient way to handle + errors is to write an error-checking function that displays the + error in text format and exits the program. + + #+NAME: qmckl_check_error + #+begin_src f90 subroutine qmckl_check_error(rc, message) use qmckl implicit none @@ -27,28 +134,28 @@ subroutine qmckl_check_error(rc, message) call exit(rc) end if end subroutine qmckl_check_error - #+end_src + #+end_src -* Computing an atomic orbital on a grid - :PROPERTIES: - :header-args: :tangle ao_grid.f90 - :END: +** Computing an atomic orbital on a grid + :PROPERTIES: + :header-args: :tangle ao_grid.f90 + :END: - The following program, in Fortran, computes the values of an atomic - orbital on a regular 3-dimensional grid. The 100^3 grid points are - automatically defined, such that the molecule fits in a box with 5 - atomic units in the borders. + The following program, in Fortran, computes the values of an atomic + orbital on a regular 3-dimensional grid. The 100^3 grid points are + automatically defined, such that the molecule fits in a box with 5 + atomic units in the borders. - This program uses the ~qmckl_check_error~ function defined above. + This program uses the ~qmckl_check_error~ function defined above. - To use this program, run + To use this program, run - #+begin_src bash :tangle no + #+begin_src bash :tangle no $ ao_grid - #+end_src + #+end_src - #+begin_src f90 :noweb yes + #+begin_src f90 :noweb yes <> program ao_grid @@ -73,11 +180,11 @@ program ao_grid double precision :: rmin(3), rmax(3) double precision, allocatable :: points(:,:) double precision, allocatable :: ao_vgl(:,:,:) - #+end_src + #+end_src - Start by fetching the command-line arguments: + Start by fetching the command-line arguments: - #+begin_src f90 + #+begin_src f90 if (iargc() /= 3) then print *, 'Syntax: ao_grid ' call exit(-1) @@ -92,21 +199,21 @@ program ao_grid print *, 'Error: 0 < point_num < 300' call exit(-1) end if - #+end_src + #+end_src - Create the QMCkl context and initialize it with the wave function - present in the TREXIO file: + Create the QMCkl context and initialize it with the wave function + present in the TREXIO file: - #+begin_src f90 + #+begin_src f90 qmckl_ctx = qmckl_context_create() rc = qmckl_trexio_read(qmckl_ctx, trexio_filename, 1_8*len(trim(trexio_filename))) call qmckl_check_error(rc, 'Read TREXIO') - #+end_src + #+end_src - We need to check that ~ao_id~ is in the range, so we get the total - number of AOs from QMCkl: + We need to check that ~ao_id~ is in the range, so we get the total + number of AOs from QMCkl: - #+begin_src f90 + #+begin_src f90 rc = qmckl_get_ao_basis_ao_num(qmckl_ctx, ao_num) call qmckl_check_error(rc, 'Getting ao_num') @@ -114,24 +221,24 @@ program ao_grid print *, 'Error: 0 < ao_id < ', ao_num call exit(-1) end if - #+end_src + #+end_src - Now we will compute the limits of the box in which the molecule fits. - For that, we first need to ask QMCkl the coordinates of nuclei. + Now we will compute the limits of the box in which the molecule fits. + For that, we first need to ask QMCkl the coordinates of nuclei. - #+begin_src f90 + #+begin_src f90 rc = qmckl_get_nucleus_num(qmckl_ctx, nucl_num) call qmckl_check_error(rc, 'Get nucleus num') allocate( nucl_coord(3, nucl_num) ) rc = qmckl_get_nucleus_coord(qmckl_ctx, 'N', nucl_coord, 3_8*nucl_num) call qmckl_check_error(rc, 'Get nucleus coord') - #+end_src + #+end_src - We now compute the coordinates of opposite points of the box, and - the distance between points along the 3 directions: + We now compute the coordinates of opposite points of the box, and + the distance between points along the 3 directions: - #+begin_src f90 + #+begin_src f90 rmin(1) = minval( nucl_coord(1,:) ) - 5.d0 rmin(2) = minval( nucl_coord(2,:) ) - 5.d0 rmin(3) = minval( nucl_coord(3,:) ) - 5.d0 @@ -141,12 +248,12 @@ program ao_grid rmax(3) = maxval( nucl_coord(3,:) ) + 5.d0 dr(1:3) = (rmax(1:3) - rmin(1:3)) / dble(point_num_x-1) - #+end_src + #+end_src - We now produce the list of point coordinates where the AO will be - evaluated: + We now produce the list of point coordinates where the AO will be + evaluated: - #+begin_src f90 + #+begin_src f90 point_num = point_num_x**3 allocate( points(point_num, 3) ) ipoint=0 @@ -166,34 +273,35 @@ program ao_grid end do z = z + dr(3) end do - #+end_src + #+end_src - We give the points to QMCkl: + We give the points to QMCkl: - #+begin_src f90 + #+begin_src f90 rc = qmckl_set_point(qmckl_ctx, 'T', points, point_num) call qmckl_check_error(rc, 'Setting points') - #+end_src + #+end_src - We allocate the space required to retrieve the values, gradients and - Laplacian of all AOs, and ask to retrieve the values of the - AOs computed at the point positions. + We allocate the space required to retrieve the values, gradients and + Laplacian of all AOs, and ask to retrieve the values of the + AOs computed at the point positions. - #+begin_src f90 + #+begin_src f90 allocate( ao_vgl(ao_num, 5, point_num) ) rc = qmckl_get_ao_basis_ao_vgl(qmckl_ctx, ao_vgl, ao_num*5_8*point_num) call qmckl_check_error(rc, 'Setting points') - #+end_src + #+end_src - We finally print the value of the AO: + We finally print the value of the AO: - #+begin_src f90 + #+begin_src f90 do ipoint=1, point_num print '(3(F16.10,X),E20.10)', points(ipoint, 1:3), ao_vgl(ao_id,1,ipoint) end do - #+end_src + #+end_src - #+begin_src f90 + #+begin_src f90 deallocate( nucl_coord, points, ao_vgl ) end program ao_grid - #+end_src + #+end_src + diff --git a/org/qmckl_local_energy.org b/org/qmckl_local_energy.org index 1ee61e0..9dcc715 100644 --- a/org/qmckl_local_energy.org +++ b/org/qmckl_local_energy.org @@ -1289,7 +1289,7 @@ end function qmckl_compute_local_energy_f double local_energy[chbrclf_walk_num]; -rc = qmckl_get_local_energy(context, &(local_energy[0]), walk_num); +rc = qmckl_get_local_energy(context, &(local_energy[0]), chbrclf_walk_num); assert (rc == QMCKL_SUCCESS); #+end_src From ce1aeb324d45a745a4b5b7313741249b7197e363 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 20 May 2022 19:57:01 +0200 Subject: [PATCH 078/100] Change point API to make it consistent for Python --- org/examples.org | 6 +++--- org/qmckl_electron.org | 2 +- org/qmckl_point.org | 20 +++++++++++++++----- python/src/qmckl.i | 4 ++++ 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/org/examples.org b/org/examples.org index c1002b2..eef87b4 100644 --- a/org/examples.org +++ b/org/examples.org @@ -101,10 +101,10 @@ point = [] for x in linspace[0]: for y in linspace[1]: for z in linspace[2]: - point += [x, y, z] + point += [ [x, y, z] ] -#point = np.array(point) -qmckl.set_point(context, 'N', point, len(point)/3) +point = np.array(point) +qmckl.set_point(context, 'N', len(point), point) #+end_src #+RESULTS: diff --git a/org/qmckl_electron.org b/org/qmckl_electron.org index c0c0254..a4736cc 100644 --- a/org/qmckl_electron.org +++ b/org/qmckl_electron.org @@ -718,7 +718,7 @@ qmckl_set_electron_coord(qmckl_context context, ctx->electron.coord_old = ctx->electron.coord_new ; qmckl_exit_code rc; - rc = qmckl_set_point(context, transp, coord, size_max/3); + rc = qmckl_set_point(context, transp, size_max/3, coord, size_max); assert (rc == QMCKL_SUCCESS); ctx->electron.coord_new = ctx->point.coord ; diff --git a/org/qmckl_point.org b/org/qmckl_point.org index 74a1e6a..306502f 100644 --- a/org/qmckl_point.org +++ b/org/qmckl_point.org @@ -263,8 +263,9 @@ end interface #+begin_src c :comments org :tangle (eval h_func) qmckl_exit_code qmckl_set_point (qmckl_context context, const char transp, + const int64_t num, const double* coord, - const int64_t num); + const int64_t size_max); #+end_src Copy a sequence of ~num~ points $(x,y,z)$ into the context. @@ -273,14 +274,22 @@ qmckl_exit_code qmckl_set_point (qmckl_context context, qmckl_exit_code qmckl_set_point (qmckl_context context, const char transp, + const int64_t num, const double* coord, - const int64_t num) + const int64_t size_max) { if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return QMCKL_NULL_CONTEXT; } + if (size_max < 3*num) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_4, + "qmckl_set_point", + "Array too small"); + } + if (transp != 'N' && transp != 'T') { return qmckl_failwith( context, QMCKL_INVALID_ARG_2, @@ -349,7 +358,7 @@ qmckl_set_point (qmckl_context context, #+begin_src f90 :comments org :tangle (eval fh_func) :noweb yes interface integer(c_int32_t) function qmckl_set_point(context, & - transp, coord, num) bind(C) + transp, num, coord, size_max) bind(C) use, intrinsic :: iso_c_binding import implicit none @@ -358,6 +367,7 @@ interface character(c_char) , intent(in) , value :: transp real (c_double ) , intent(in) :: coord(*) integer (c_int64_t) , intent(in) , value :: num + integer (c_int64_t) , intent(in) , value :: size_max end function end interface #+end_src @@ -380,7 +390,7 @@ double coord3[point_num*3]; rc = qmckl_get_point (context, 'N', coord2, (point_num*3)); assert(rc == QMCKL_NOT_PROVIDED); -rc = qmckl_set_point (context, 'N', coord, point_num); +rc = qmckl_set_point (context, 'N', point_num, coord, (point_num*3)); assert(rc == QMCKL_SUCCESS); int64_t n; @@ -404,7 +414,7 @@ for (int64_t i=0 ; i Date: Fri, 20 May 2022 23:20:06 +0200 Subject: [PATCH 079/100] Added Python example. --- org/examples.org | 101 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 77 insertions(+), 24 deletions(-) diff --git a/org/examples.org b/org/examples.org index eef87b4..8650c40 100644 --- a/org/examples.org +++ b/org/examples.org @@ -7,16 +7,20 @@ For simplicity, we assume that the wave function parameters are stored in a [[https://github.com/TREX-CoE/trexio][TREXIO]] file. * Python - ** Check numerically that MOs are orthonormal - In this example, we will compute the numerically the overlap + In this example, we will compute numerically the overlap between the molecular orbitals: \[ S_{ij} = \int \phi_i(\mathbf{r}) \phi_j(\mathbf{r}) \text{d}\mathbf{r} \sim \sum_{k=1}^{N} \phi_i(\mathbf{r}_k) - \phi_j(\mathbf{r}_k) \delta \mathbf{r} + \phi_j(\mathbf{r}_k) \delta \mathbf{r} + \] + \[ + S_{ij} = \langle \phi_i | \phi_j \rangle + \sim \sum_{k=1}^{N} \langle \phi_i | \mathbf{r}_k \rangle + \langle \mathbf{r}_k | \phi_j \rangle \] @@ -27,9 +31,9 @@ import qmckl #+RESULTS: - First, we create a context for the QMCkl calculation, and load the - wave function stored in =h2o_5z.h5= inside it: + wave function stored in =h2o_5z.h5= inside it. It is a Hartree-Fock + determinant for the water molecule in the cc-pV5Z basis set. #+begin_src python :session trexio_filename = "..//share/qmckl/test_data/h2o_5z.h5" @@ -41,12 +45,12 @@ qmckl.trexio_read(context, trexio_filename) #+RESULTS: : None - We now define the grid points as a regular grid around the + We now define the grid points $\mathbf{r}_k$ as a regular grid around the molecule. We fetch the nuclear coordinates from the context, - #+begin_src python :session :results output + #+begin_src python :session :results output :export both nucl_num = qmckl.get_nucleus_num(context) nucl_charge = qmckl.get_nucleus_charge(context, nucl_num) @@ -61,21 +65,22 @@ for i in range(nucl_num): nucl_coord[i,2]) ) #+end_src - #+RESULTS: - : 8 +0.000000 +0.000000 +0.000000 - : 1 -1.430429 +0.000000 -1.107157 - : 1 +1.430429 +0.000000 -1.107157 + #+begin_example +8 +0.000000 +0.000000 +0.000000 +1 -1.430429 +0.000000 -1.107157 +1 +1.430429 +0.000000 -1.107157 + #+end_example and compute the coordinates of the grid points: #+begin_src python :session -nx = ( 40, 40, 40 ) +nx = ( 120, 120, 120 ) +shift = np.array([5.,5.,5.]) point_num = nx[0] * nx[1] * nx[2] rmin = np.array( list([ np.min(nucl_coord[:,a]) for a in range(3) ]) ) rmax = np.array( list([ np.max(nucl_coord[:,a]) for a in range(3) ]) ) -shift = np.array([5.,5.,5.]) linspace = [ None for i in range(3) ] step = [ None for i in range(3) ] @@ -86,15 +91,13 @@ for a in range(3): retstep=True) dr = step[0] * step[1] * step[2] -dr #+end_src #+RESULTS: - : 0.024081249137090373 - Now the grid is ready, we can create the list of grid points on - which the MOs will be evaluated, and transfer them to the QMCkl - context: + Now the grid is ready, we can create the list of grid points + $\mathbf{r}_k$ on which the MOs $\phi_i$ will be evaluated, and + transfer them to the QMCkl context: #+begin_src python :session point = [] @@ -104,13 +107,63 @@ for x in linspace[0]: point += [ [x, y, z] ] point = np.array(point) -qmckl.set_point(context, 'N', len(point), point) +point_num = len(point) +qmckl.set_point(context, 'N', point_num, np.reshape(point, (point_num*3))) #+end_src #+RESULTS: + : None - Then, will first evaluate all the MOs at the grid points, and then we will - compute the overlap between all the MOs. + Then, we evaluate all the MOs at the grid points (and time the execution), + and thus obtain the matrix $M_{ki} = \langle \mathbf{r}_k | \phi_i \rangle = + \phi_i(\mathbf{r}_k)$. + + #+begin_src python :session :results output :export both +import time + +mo_num = qmckl.get_mo_basis_mo_num(context) + +before = time.time() +mo_value = qmckl.get_mo_basis_mo_value(context, point_num*mo_num) +after = time.time() + +mo_value = np.reshape( mo_value, (point_num, mo_num) ) + +print("Number of MOs: ", mo_num) +print("Number of grid points: ", point_num) +print("Execution time : ", (after - before), "seconds") + + #+end_src + + #+begin_example +Number of MOs: 201 +Number of grid points: 1728000 +Execution time : 3.511528968811035 seconds + #+end_example + + and finally we compute the overlap between all the MOs as + $M^\dagger M$. + + #+begin_src python :session :results output +overlap = mo_value.T @ mo_value * dr +print (overlap) + #+end_src + + #+begin_example + [[ 9.88693941e-01 2.34719693e-03 -1.50518232e-08 ... 3.12084178e-09 + -5.81064929e-10 3.70130091e-02] + [ 2.34719693e-03 9.99509628e-01 3.18930040e-09 ... -2.46888958e-10 + -1.06064273e-09 -7.65567973e-03] + [-1.50518232e-08 3.18930040e-09 9.99995073e-01 ... -5.84882580e-06 + -1.21598117e-06 4.59036468e-08] + ... + [ 3.12084178e-09 -2.46888958e-10 -5.84882580e-06 ... 1.00019107e+00 + -2.03342837e-04 -1.36954855e-08] + [-5.81064929e-10 -1.06064273e-09 -1.21598117e-06 ... -2.03342837e-04 + 9.99262427e-01 1.18264754e-09] + [ 3.70130091e-02 -7.65567973e-03 4.59036468e-08 ... -1.36954855e-08 + 1.18264754e-09 8.97215950e-01]] + #+end_example * Fortran ** Checking errors @@ -278,7 +331,7 @@ program ao_grid We give the points to QMCkl: #+begin_src f90 - rc = qmckl_set_point(qmckl_ctx, 'T', points, point_num) + rc = qmckl_set_point(qmckl_ctx, 'T', point_num, points, size(points)*1_8 ) call qmckl_check_error(rc, 'Setting points') #+end_src @@ -292,11 +345,11 @@ program ao_grid call qmckl_check_error(rc, 'Setting points') #+end_src - We finally print the value of the AO: + We finally print the value and Laplacian of the AO: #+begin_src f90 do ipoint=1, point_num - print '(3(F16.10,X),E20.10)', points(ipoint, 1:3), ao_vgl(ao_id,1,ipoint) + print '(3(F10.6,X),2(E20.10,X))', points(ipoint, 1:3), ao_vgl(ao_id,1,ipoint), ao_vgl(ao_id,5,ipoint) end do #+end_src From 222574e9859e6fea3e6ccacbcbfc138bd8dbf8ed Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 3 Jun 2022 15:38:02 +0200 Subject: [PATCH 080/100] transa -> transb: Thanks to Axel Auweter ;-) --- org/qmckl_distance.org | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/org/qmckl_distance.org b/org/qmckl_distance.org index 6d3900e..2160ed7 100644 --- a/org/qmckl_distance.org +++ b/org/qmckl_distance.org @@ -133,7 +133,7 @@ integer function qmckl_distance_sq_f(context, transa, transb, m, n, & if (transb == 'N' .or. transb == 'n') then continue - else if (transa == 'T' .or. transa == 't') then + else if (transb == 'T' .or. transb == 't') then transab = transab + 2 else transab = -100 @@ -533,7 +533,7 @@ integer function qmckl_distance_f(context, transa, transb, m, n, & if (transb == 'N' .or. transb == 'n') then continue - else if (transa == 'T' .or. transa == 't') then + else if (transb == 'T' .or. transb == 't') then transab = transab + 2 else transab = -100 @@ -1314,7 +1314,7 @@ integer function qmckl_distance_rescaled_deriv_e_f(context, transa, transb, m, n if (transb == 'N' .or. transb == 'n') then continue - else if (transa == 'T' .or. transa == 't') then + else if (transb == 'T' .or. transb == 't') then transab = transab + 2 else transab = -100 From 2784e894d473cc96eeb8f4a8ea3253de993e6ce1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 7 Jun 2022 15:55:48 +0200 Subject: [PATCH 081/100] python -> python3 --- autogen.sh | 2 +- org/qmckl_jastrow.org | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/autogen.sh b/autogen.sh index 588976a..018116f 100755 --- a/autogen.sh +++ b/autogen.sh @@ -1,5 +1,5 @@ #!/bin/bash export srcdir="." -python ${srcdir}/tools/build_makefile.py +python3 ${srcdir}/tools/build_makefile.py autoreconf -i -Wall --no-recursive diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 3007181..5fe060a 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -141,7 +141,7 @@ int main() { | ~bord_num~ | ~int64_t~ | in | The number of b coeffecients | | ~cord_num~ | ~int64_t~ | in | The number of c coeffecients | | ~type_nucl_num~ | ~int64_t~ | in | Number of Nucleii types | - | ~type_nucl_vector~ | ~int64_t[nucl_num]~ | in | IDs of types of Nucleii | + | ~type_nucl_vector~ | ~int64_t[nucl_num]~ | in | IDs of types of Nuclei | | ~aord_vector~ | ~double[aord_num + 1][type_nucl_num]~ | in | Order of a polynomial coefficients | | ~bord_vector~ | ~double[bord_num + 1]~ | in | Order of b polynomial coefficients | | ~cord_vector~ | ~double[cord_num][type_nucl_num]~ | in | Order of c polynomial coefficients | From 07e1e44f0538ded21b98bc56469ebb08628e8bb5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 11 Jun 2022 10:57:58 +0200 Subject: [PATCH 082/100] Include assembly in qmckl_ao --- org/qmckl_ao.org | 100 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 87 insertions(+), 13 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 4120690..b1734de 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -3034,7 +3034,7 @@ qmckl_get_ao_basis_ao_vgl_inplace (qmckl_context context, double* const ao_vgl, const int64_t size_max); #+end_src - + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none qmckl_exit_code qmckl_get_ao_basis_ao_vgl_inplace (qmckl_context context, @@ -3093,7 +3093,7 @@ qmckl_get_ao_basis_ao_vgl_inplace (qmckl_context context, #+end_src - + #+begin_src c :comments org :tangle (eval h_func) :noweb yes qmckl_exit_code qmckl_get_ao_basis_ao_value (qmckl_context context, @@ -3161,7 +3161,7 @@ qmckl_get_ao_basis_ao_value_inplace (qmckl_context context, double* const ao_value, const int64_t size_max); #+end_src - + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none qmckl_exit_code qmckl_get_ao_basis_ao_value_inplace (qmckl_context context, @@ -6521,20 +6521,94 @@ qmckl_compute_ao_vgl_hpc_gaussian ( exp_mat[iprim][4] = f * (3.0 - 2.0 * ar2[iprim]); } + +/* --- */ for (int i=0 ; i Date: Tue, 14 Jun 2022 22:30:33 +0200 Subject: [PATCH 083/100] Switch for asm in AOs --- org/qmckl_ao.org | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index b1734de..1e10ccf 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -6521,8 +6521,25 @@ qmckl_compute_ao_vgl_hpc_gaussian ( exp_mat[iprim][4] = f * (3.0 - 2.0 * ar2[iprim]); } - + /* --- */ + switch (8) { + case(5): + + for (int i=0 ; i Date: Wed, 15 Jun 2022 11:11:11 +0200 Subject: [PATCH 084/100] [CI] Disable debugging on MacOS --- .github/workflows/test-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index 4bfb193..c85a10e 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -121,7 +121,7 @@ jobs: run: | export PKG_CONFIG_PATH=${PWD}/trexio/_install/lib/pkgconfig:$PKG_CONFIG_PATH ./autogen.sh - ./configure CC=gcc-10 FC=gfortran-10 --enable-silent-rules --enable-debug + ./configure CC=gcc-10 FC=gfortran-10 --enable-silent-rules make -j 4 - name: Run test From d21adadcf2f9d86c8fdd1aca5e0b8dfccd67c696 Mon Sep 17 00:00:00 2001 From: Evgeny Posenitskiy <45995097+q-posev@users.noreply.github.com> Date: Wed, 15 Jun 2022 11:36:39 +0200 Subject: [PATCH 085/100] [CI] Disable the broker MacOS CI --- .github/workflows/test-build.yml | 114 +++++++++++++++---------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index c85a10e..16b32ce 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -76,60 +76,60 @@ jobs: run: make python-test working-directory: _build - x86_macos: - - runs-on: macos-latest - name: x86 MacOS latest - - steps: - - uses: actions/checkout@v2 - - name: install dependencies - run: brew install emacs hdf5 automake pkg-config - - - name: Symlink gfortran (macOS) - if: runner.os == 'macOS' - run: | - # make sure gfortran is available - # https://github.com/actions/virtual-environments/issues/2524 - # https://github.com/cbg-ethz/dce/blob/master/.github/workflows/pkgdown.yaml - sudo ln -s /usr/local/bin/gfortran-10 /usr/local/bin/gfortran - sudo mkdir /usr/local/gfortran - sudo ln -s /usr/local/Cellar/gcc@10/*/lib/gcc/10 /usr/local/gfortran/lib - gfortran --version - - - name: Install the latest TREXIO from the GitHub clone - run: | - git clone https://github.com/TREX-CoE/trexio.git - cd trexio - ./autogen.sh - ./configure --prefix=${PWD}/_install --enable-silent-rules - make -j 4 - make install - - - name: Test TREXIO - run: make -j 4 check - working-directory: trexio - - - name: Archive TREXIO test log file - if: failure() - uses: actions/upload-artifact@v2 - with: - name: test-report-trexio-macos - path: trexio/test-suite.log - - - name: Build QMCkl - run: | - export PKG_CONFIG_PATH=${PWD}/trexio/_install/lib/pkgconfig:$PKG_CONFIG_PATH - ./autogen.sh - ./configure CC=gcc-10 FC=gfortran-10 --enable-silent-rules - make -j 4 - - - name: Run test - run: make -j 4 check - - - name: Archive test log file - if: failure() - uses: actions/upload-artifact@v2 - with: - name: test-report-macos - path: test-suite.log +# x86_macos: +# +# runs-on: macos-latest +# name: x86 MacOS latest +# +# steps: +# - uses: actions/checkout@v2 +# - name: install dependencies +# run: brew install emacs hdf5 automake pkg-config +# +# - name: Symlink gfortran (macOS) +# if: runner.os == 'macOS' +# run: | +# # make sure gfortran is available +# # https://github.com/actions/virtual-environments/issues/2524 +# # https://github.com/cbg-ethz/dce/blob/master/.github/workflows/pkgdown.yaml +# sudo ln -s /usr/local/bin/gfortran-10 /usr/local/bin/gfortran +# sudo mkdir /usr/local/gfortran +# sudo ln -s /usr/local/Cellar/gcc@10/*/lib/gcc/10 /usr/local/gfortran/lib +# gfortran --version +# +# - name: Install the latest TREXIO from the GitHub clone +# run: | +# git clone https://github.com/TREX-CoE/trexio.git +# cd trexio +# ./autogen.sh +# ./configure --prefix=${PWD}/_install --enable-silent-rules +# make -j 4 +# make install +# +# - name: Test TREXIO +# run: make -j 4 check +# working-directory: trexio +# +# - name: Archive TREXIO test log file +# if: failure() +# uses: actions/upload-artifact@v2 +# with: +# name: test-report-trexio-macos +# path: trexio/test-suite.log +# +# - name: Build QMCkl +# run: | +# export PKG_CONFIG_PATH=${PWD}/trexio/_install/lib/pkgconfig:$PKG_CONFIG_PATH +# ./autogen.sh +# ./configure CC=gcc-10 FC=gfortran-10 --enable-silent-rules +# make -j 4 +# +# - name: Run test +# run: make -j 4 check +# +# - name: Archive test log file +# if: failure() +# uses: actions/upload-artifact@v2 +# with: +# name: test-report-macos +# path: test-suite.log From b5423dca3db1eee9bca227d069458791cdfd68dd Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 15 Jun 2022 16:12:33 +0200 Subject: [PATCH 086/100] Add missing fortran line --- org/qmckl_mo.org | 1 + 1 file changed, 1 insertion(+) diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index 4a148c6..bdc6383 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -262,6 +262,7 @@ bool qmckl_mo_basis_provided(const qmckl_context context) { *** Fortran interfaces + #+begin_src f90 :tangle (eval fh_func) :comments org interface integer(c_int32_t) function qmckl_get_mo_basis_mo_num (context, & mo_num) bind(C) From 3b6a85d40467479dbf3543cf47d4e879615189ab Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 15 Jun 2022 16:20:12 +0200 Subject: [PATCH 087/100] Fix typo --- org/qmckl_mo.org | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index bdc6383..d44cee3 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -282,7 +282,7 @@ interface implicit none integer (c_int64_t) , intent(in) , value :: context double precision, intent(out) :: coefficient(*) - integer (c_int64_t) , intent(int), value :: size_max + integer (c_int64_t) , intent(in), value :: size_max end function qmckl_get_mo_basis_coefficient end interface From 1b6cf47f0dd81c155bff1b2e3fa04cb5b4f0f64e Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 15 Jun 2022 23:21:31 +0200 Subject: [PATCH 088/100] Fixed bug in HPC version of AOs --- org/qmckl_ao.org | 80 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 66 insertions(+), 14 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 43ca5bc..b53e4f7 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -3528,7 +3528,7 @@ integer function qmckl_compute_ao_basis_primitive_gaussian_vgl_f( & info = QMCKL_SUCCESS ! Don't compute exponentials when the result will be almost zero. - cutoff = -dlog(1.d-15) + cutoff = 27.631021115928547 ! -dlog(1.d-12) do inucl=1,nucl_num ! C is zero-based, so shift bounds by one @@ -3752,10 +3752,15 @@ print ( "[7][4][26] : %e"% lf(a,x,y)) (int64_t) 5*elec_num*walk_num*prim_num ); assert (rc == QMCKL_SUCCESS); + printf("prim_vgl[26][0][7] = %e\n",prim_vgl[26][0][7]); assert( fabs(prim_vgl[26][0][7] - ( 1.0501570432064878E-003)) < 1.e-14 ); + printf("prim_vgl[26][1][7] = %e\n",prim_vgl[26][1][7]); assert( fabs(prim_vgl[26][1][7] - (-7.5014974095310560E-004)) < 1.e-14 ); + printf("prim_vgl[26][2][7] = %e\n",prim_vgl[26][2][7]); assert( fabs(prim_vgl[26][2][7] - (-3.8250692897610380E-003)) < 1.e-14 ); + printf("prim_vgl[26][3][7] = %e\n",prim_vgl[26][3][7]); assert( fabs(prim_vgl[26][3][7] - ( 3.4950559194080275E-003)) < 1.e-14 ); + printf("prim_vgl[26][4][7] = %e\n",prim_vgl[26][4][7]); assert( fabs(prim_vgl[26][4][7] - ( 2.0392163767356572E-002)) < 1.e-14 ); } @@ -3875,7 +3880,7 @@ integer function qmckl_compute_ao_basis_shell_gaussian_vgl_f( & ! Don't compute exponentials when the result will be almost zero. ! TODO : Use numerical precision here - cutoff = -dlog(1.d-12) + cutoff = 27.631021115928547 !-dlog(1.d-12) do ipoint = 1, point_num @@ -5501,7 +5506,7 @@ integer function qmckl_compute_ao_value_doc_f(context, & info = QMCKL_SUCCESS ! Don't compute polynomials when the radial part is zero. - cutoff = -dlog(1.d-12) + cutoff = 27.631021115928547 !-dlog(1.d-12) do ipoint = 1, point_num e_coord(1) = coord(ipoint,1) @@ -5622,7 +5627,7 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context, } /* Don't compute polynomials when the radial part is zero. */ - double cutoff = -log(1.e-12); + double cutoff = 27.631021115928547; // -log(1.e-12) #ifdef HAVE_OPENMP #pragma omp parallel @@ -6247,7 +6252,7 @@ integer function qmckl_compute_ao_vgl_doc_f(context, & info = QMCKL_SUCCESS ! Don't compute polynomials when the radial part is zero. - cutoff = -dlog(1.d-12) + cutoff = 27.631021115928547 ! -dlog(1.d-12) do ipoint = 1, point_num e_coord(1) = coord(ipoint,1) @@ -6398,8 +6403,8 @@ qmckl_compute_ao_vgl_hpc_gaussian ( ao_index[shell_num] = ao_num+1; } - /* Don't compute polynomials when the radial part is zero. */ - double cutoff = -log(1.e-12); + /* Don't compute when the radial part is zero. */ + double cutoff = 27.631021115928547; // -log(1.e-12) #ifdef HAVE_OPENMP #pragma omp parallel @@ -6637,11 +6642,6 @@ qmckl_compute_ao_vgl_hpc_gaussian ( for (int64_t ishell = ishell_start ; ishell < ishell_end ; ++ishell) { const double s1 = ce_mat[ishell-ishell_start][0]; - if (s1 == 0.0) continue; - const double s2 = ce_mat[ishell-ishell_start][1]; - const double s3 = ce_mat[ishell-ishell_start][2]; - const double s4 = ce_mat[ishell-ishell_start][3]; - const double s5 = ce_mat[ishell-ishell_start][4]; const int64_t k = ao_index[ishell]; double* restrict const ao_vgl_1 = ao_vgl + ipoint*5*ao_num + k; @@ -6654,6 +6654,22 @@ qmckl_compute_ao_vgl_hpc_gaussian ( double* restrict const ao_vgl_4 = ao_vgl_1 + (ao_num<<1) + ao_num; double* restrict const ao_vgl_5 = ao_vgl_1 + (ao_num<<2); + if (s1 == 0.0) { + for (int64_t il=0 ; ilao_basis.expo_per_nucleus, ctx->ao_basis.coef_per_nucleus, ctx->ao_basis.ao_vgl); + +/* DEBUG + rc = qmckl_provide_ao_basis_shell_vgl(context); + if (rc != QMCKL_SUCCESS) { + return qmckl_failwith( context, rc, "qmckl_provide_ao_basis_shell_vgl", NULL); + } + int64_t K= ctx->ao_basis.ao_num * 5 * ctx->point.num; + double* check = malloc(K*sizeof(double)); + rc = qmckl_compute_ao_vgl_doc(context, + ctx->ao_basis.ao_num, + ctx->ao_basis.shell_num, + ctx->point.num, + ctx->nucleus.num, + ctx->point.coord.data, + ctx->nucleus.coord.data, + ctx->ao_basis.nucleus_index, + ctx->ao_basis.nucleus_shell_num, + ctx->ao_basis.nucleus_range, + ctx->ao_basis.nucleus_max_ang_mom, + ctx->ao_basis.shell_ang_mom, + ctx->ao_basis.ao_factor, + ctx->ao_basis.shell_vgl, + check); + for (int64_t i=0 ; iao_basis.ao_vgl[i]) > 1.e-10) { + int a, b, c; + a = i/(ctx->ao_basis.ao_num*5); + b = (i-a*ctx->ao_basis.ao_num*5)/ctx->ao_basis.ao_num; + c = (i-a*ctx->ao_basis.ao_num*5 -b*ctx->ao_basis.ao_num); + printf("%d: %d, %d, %d, %e %e\n", i, a, b, c, check[i], ctx->ao_basis.ao_vgl[i]); + } + } +*/ + /* } else if (ctx->ao_basis.type == 'S') { rc = qmck_compute_ao_vgl_hpc_slater(context, From a9902740ec00368f39dfdbdb28cf8f86d21e6f79 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 16 Jun 2022 17:17:48 +0200 Subject: [PATCH 089/100] Update README.md QMCkl logo --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 703bfee..e212dd7 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # QMCkl: Quantum Monte Carlo Kernel Library + + ![Build Status](https://github.com/TREX-CoE/qmckl/workflows/test-build/badge.svg?branch=master) The domain of quantum chemistry needs a library in which the main From 49e535feb97758efbcd924e8578c920639aa4e16 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 29 Jun 2022 10:34:20 +0200 Subject: [PATCH 090/100] Moved python in Makefile.am --- Makefile.am | 54 +++++++++++++++++++++++++++++----------------------- configure.ac | 2 +- 2 files changed, 31 insertions(+), 25 deletions(-) diff --git a/Makefile.am b/Makefile.am index c782ac6..ea302d3 100644 --- a/Makefile.am +++ b/Makefile.am @@ -93,6 +93,36 @@ html-local: $(htmlize_el) $(dist_html_DATA) text: $(htmlize_el) $(dist_text_DATA) doc: html text +setup_py = $(srcdir)/python/setup.py +process_header_py = $(srcdir)/python/src/process_header.py +test_py = $(srcdir)/python/test/test_api.py +qmckl_i = $(srcdir)/python/src/qmckl.i +numpy_i = $(srcdir)/python/src/numpy.i +qmckl_wrap_c = python/src/qmckl_wrap.c +qmckl_include_i = python/src/qmckl_include.i +qmckl_py = python/src/qmckl.py + +dist_python_DATA = $(setup_py) $(qmckl_py) $(qmckl_wrap_c) \ + $(srcdir)/python/pyproject.toml \ + $(srcdir)/python/requirements.txt \ + $(srcdir)/python/README.md + +python-install: $(qmckl_h) $(lib_LTLIBRARIES) $(dist_python_DATA) + $(MKDIR_P) python/src + cd python ; \ + [[ ! -f pyproject.toml ]] && \ + cp $(abs_srcdir)/python/{pyproject.toml,requirements.txt,README.md,setup.py} . ; \ + cp src/qmckl.py . ; \ + export QMCKL_INCLUDEDIR="$(prefix)/include" ; \ + export QMCKL_LIBDIR="$(prefix)/lib" ; \ + pip install . + +python-test: $(test_py) + cd $(abs_srcdir)/python/test/ && \ + python test_api.py + +.PHONY: python-test python-install cppcheck + if QMCKL_DEVEL @@ -170,15 +200,6 @@ cppcheck.out: $(qmckl_h) --language=c --std=c99 -rp --platform=unix64 \ -I$(srcdir)/include -I$(top_builddir)/include *.c *.h 2>../$@ -setup_py = $(srcdir)/python/setup.py -process_header_py = $(srcdir)/python/src/process_header.py -test_py = $(srcdir)/python/test/test_api.py -qmckl_i = $(srcdir)/python/src/qmckl.i -numpy_i = $(srcdir)/python/src/numpy.i -qmckl_wrap_c = python/src/qmckl_wrap.c -qmckl_include_i = python/src/qmckl_include.i -qmckl_py = python/qmckl/qmckl.py - $(qmckl_include_i): $(qmckl_h) $(process_header_py) $(MKDIR_P) python/src python $(process_header_py) $(qmckl_h) @@ -190,24 +211,9 @@ $(qmckl_py): $(qmckl_i) $(qmckl_include_i) $(qmckl_wrap_c): $(qmckl_py) -python-install: $(qmckl_h) $(qmckl_i) $(setup_py) $(qmckl_py) $(qmckl_wrap_c) - $(MKDIR_P) python/src - cd python ; \ - [[ ! -f pyproject.toml ]] && \ - cp $(abs_srcdir)/python/{pyproject.toml,requirements.txt,README.md,setup.py} . ; \ - cp src/qmckl.py . ; \ - export QMCKL_INCLUDEDIR="$(prefix)/include" ; \ - export QMCKL_LIBDIR="$(prefix)/lib" ; \ - pip install . - -python-test: $(test_py) - cd $(abs_srcdir)/python/test/ && \ - python test_api.py CLEANFILES += $(qmckl_wrap_c) \ $(qmckl_include_i) \ $(qmckl_py) -.PHONY: cppcheck python-test python-install - endif diff --git a/configure.ac b/configure.ac index ca736be..e8e09c6 100644 --- a/configure.ac +++ b/configure.ac @@ -35,7 +35,7 @@ AC_PREREQ([2.69]) -AC_INIT([qmckl],[0.1.1],[https://github.com/TREX-CoE/qmckl/issues],[],[https://trex-coe.github.io/qmckl/index.html]) +AC_INIT([qmckl],[0.2.1],[https://github.com/TREX-CoE/qmckl/issues],[],[https://trex-coe.github.io/qmckl/index.html]) AC_CONFIG_AUX_DIR([tools]) AM_INIT_AUTOMAKE([subdir-objects color-tests parallel-tests silent-rules 1.11]) From d5fcd2e0fec9872e7aefe0da2b7ca0cd74ba8940 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 29 Jun 2022 13:31:58 +0200 Subject: [PATCH 091/100] Intrinsics for AOs and alignment --- org/qmckl_ao.org | 186 ++++++++++++++++++++++------------------------- 1 file changed, 85 insertions(+), 101 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index b53e4f7..2f02ce8 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -111,6 +111,10 @@ int main() { #include "qmckl_memory_private_func.h" #include "qmckl_ao_private_type.h" #include "qmckl_ao_private_func.h" + +#ifdef HAVE_HPC +#include +#endif #+end_src * Context @@ -6411,23 +6415,25 @@ qmckl_compute_ao_vgl_hpc_gaussian ( #endif { qmckl_exit_code rc; - double ar2[prim_max]; - int32_t powers[3*size_max]; - double poly_vgl_l1[4][4] = {{1.0, 0.0, 0.0, 0.0}, + double ar2[prim_max] __attribute__((aligned(64))); + int32_t powers[3*size_max] __attribute__((aligned(64))); + double poly_vgl_l1[4][4] __attribute__((aligned(64))) = + {{1.0, 0.0, 0.0, 0.0}, {0.0, 1.0, 0.0, 0.0}, {0.0, 0.0, 1.0, 0.0}, {0.0, 0.0, 0.0, 1.0}}; - double poly_vgl_l2[5][10] = {{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.}, + double poly_vgl_l2[5][10]__attribute__((aligned(64))) = + {{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.}, {0., 1., 0., 0., 0., 0., 0., 0., 0., 0.}, {0., 0., 1., 0., 0., 0., 0., 0., 0., 0.}, {0., 0., 0., 1., 0., 0., 0., 0., 0., 0.}, {0., 0., 0., 0., 2., 0., 0., 2., 0., 2.}}; - double poly_vgl[5][size_max]; + double poly_vgl[5][size_max] __attribute__((aligned(64))); - double exp_mat[prim_max][8]; - double ce_mat[shell_max][8]; + double exp_mat[prim_max][8] __attribute__((aligned(64))) ; + double ce_mat[shell_max][8] __attribute__((aligned(64))) ; - double coef_mat[nucl_num][shell_max][prim_max]; + double coef_mat[nucl_num][shell_max][prim_max] __attribute__((aligned(64))); for (int i=0 ; i Date: Wed, 29 Jun 2022 13:59:09 +0200 Subject: [PATCH 092/100] Memory alignment in qmckl_malloc --- org/qmckl_ao.org | 6 +++--- org/qmckl_memory.org | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 2f02ce8..2e262b7 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -6536,7 +6536,7 @@ qmckl_compute_ao_vgl_hpc_gaussian ( /* --- */ - switch (512) { + switch (8) { case(5): for (int i=0 ; i> 6) << 6 ); +#else void * pointer = malloc(info.size); +#endif if (pointer == NULL) { return NULL; } From 1b846de413176004ddc05649893467d6b0795a70 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 7 Jul 2022 18:25:49 +0200 Subject: [PATCH 093/100] Check in malloc --- org/qmckl_blas.org | 151 ++++++++++++++++++++++++------------------- org/qmckl_point.org | 2 +- org/qmckl_trexio.org | 2 +- 3 files changed, 88 insertions(+), 67 deletions(-) diff --git a/org/qmckl_blas.org b/org/qmckl_blas.org index 1cf76e4..c21adee 100644 --- a/org/qmckl_blas.org +++ b/org/qmckl_blas.org @@ -72,11 +72,11 @@ whatever data structures they prefer. These data types are expected to be used internally in QMCkl. They are not intended to be passed to external codes. - + * Data types ** Vector - + | Variable | Type | Description | |----------+-----------+-------------------------| | ~size~ | ~int64_t~ | Dimension of the vector | @@ -92,7 +92,7 @@ typedef struct qmckl_vector { #+begin_src c :comments org :tangle (eval h_private_func) qmckl_vector -qmckl_vector_alloc( qmckl_context context, +qmckl_vector_alloc( qmckl_context context, const int64_t size); #+end_src @@ -100,12 +100,12 @@ qmckl_vector_alloc( qmckl_context context, #+begin_src c :comments org :tangle (eval c) :exports none qmckl_vector -qmckl_vector_alloc( qmckl_context context, +qmckl_vector_alloc( qmckl_context context, const int64_t size) { /* Should always be true by contruction */ assert (size > (int64_t) 0); - + qmckl_vector result; result.size = size; @@ -120,23 +120,30 @@ qmckl_vector_alloc( qmckl_context context, return result; } #+end_src - + #+begin_src c :comments org :tangle (eval h_private_func) qmckl_exit_code -qmckl_vector_free( qmckl_context context, +qmckl_vector_free( qmckl_context context, qmckl_vector* vector); #+end_src #+begin_src c :comments org :tangle (eval c) :exports none qmckl_exit_code -qmckl_vector_free( qmckl_context context, +qmckl_vector_free( qmckl_context context, qmckl_vector* vector) { + if (vector == NULL) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_2, + "qmckl_vector_free", + "Null pointer"); + } + /* Always true */ assert (vector->data != NULL); - + qmckl_exit_code rc; - + rc = qmckl_free(context, vector->data); if (rc != QMCKL_SUCCESS) { return rc; @@ -149,7 +156,7 @@ qmckl_vector_free( qmckl_context context, #+end_src ** Matrix - + | Variable | Type | Description | |----------+--------------+-----------------------------| | ~size~ | ~int64_t[2]~ | Dimension of each component | @@ -157,7 +164,7 @@ qmckl_vector_free( qmckl_context context, The dimensions use Fortran ordering: two elements differing by one in the first dimension are consecutive in memory. - + #+begin_src c :comments org :tangle (eval h_private_type) :exports none typedef struct qmckl_matrix { double* restrict data; @@ -168,7 +175,7 @@ typedef struct qmckl_matrix { #+begin_src c :comments org :tangle (eval h_private_func) qmckl_matrix -qmckl_matrix_alloc( qmckl_context context, +qmckl_matrix_alloc( qmckl_context context, const int64_t size1, const int64_t size2); #+end_src @@ -177,13 +184,13 @@ qmckl_matrix_alloc( qmckl_context context, #+begin_src c :comments org :tangle (eval c) :exports none qmckl_matrix -qmckl_matrix_alloc( qmckl_context context, +qmckl_matrix_alloc( qmckl_context context, const int64_t size1, const int64_t size2) { /* Should always be true by contruction */ assert (size1 * size2 > (int64_t) 0); - + qmckl_matrix result; result.size[0] = size1; @@ -201,23 +208,30 @@ qmckl_matrix_alloc( qmckl_context context, return result; } #+end_src - + #+begin_src c :comments org :tangle (eval h_private_func) qmckl_exit_code -qmckl_matrix_free( qmckl_context context, +qmckl_matrix_free( qmckl_context context, qmckl_matrix* matrix); #+end_src #+begin_src c :comments org :tangle (eval c) :exports none qmckl_exit_code -qmckl_matrix_free( qmckl_context context, +qmckl_matrix_free( qmckl_context context, qmckl_matrix* matrix) { + if (matrix == NULL) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_2, + "qmckl_matrix_free", + "Null pointer"); + } + /* Always true */ assert (matrix->data != NULL); - + qmckl_exit_code rc; - + rc = qmckl_free(context, matrix->data); if (rc != QMCKL_SUCCESS) { return rc; @@ -231,7 +245,7 @@ qmckl_matrix_free( qmckl_context context, #+end_src ** Tensor - + | Variable | Type | Description | |----------+-----------------------------------+-----------------------------| | ~order~ | ~int64_t~ | Order of the tensor | @@ -240,7 +254,7 @@ qmckl_matrix_free( qmckl_context context, The dimensions use Fortran ordering: two elements differing by one in the first dimension are consecutive in memory. - + #+begin_src c :comments org :tangle (eval h_private_type) :exports none #define QMCKL_TENSOR_ORDER_MAX 16 @@ -254,7 +268,7 @@ typedef struct qmckl_tensor { #+begin_src c :comments org :tangle (eval h_private_func) qmckl_tensor -qmckl_tensor_alloc( qmckl_context context, +qmckl_tensor_alloc( qmckl_context context, const int64_t order, const int64_t* size); #+end_src @@ -264,7 +278,7 @@ qmckl_tensor_alloc( qmckl_context context, #+begin_src c :comments org :tangle (eval c) :exports none qmckl_tensor -qmckl_tensor_alloc( qmckl_context context, +qmckl_tensor_alloc( qmckl_context context, const int64_t order, const int64_t* size) { @@ -272,7 +286,7 @@ qmckl_tensor_alloc( qmckl_context context, assert (order > 0); assert (order <= QMCKL_TENSOR_ORDER_MAX); assert (size != NULL); - + qmckl_tensor result; result.order = order; @@ -295,28 +309,35 @@ qmckl_tensor_alloc( qmckl_context context, return result; } #+end_src - + #+begin_src c :comments org :tangle (eval h_private_func) qmckl_exit_code -qmckl_tensor_free (qmckl_context context, +qmckl_tensor_free (qmckl_context context, qmckl_tensor* tensor); #+end_src #+begin_src c :comments org :tangle (eval c) :exports none qmckl_exit_code -qmckl_tensor_free( qmckl_context context, +qmckl_tensor_free( qmckl_context context, qmckl_tensor* tensor) { + if (tensor == NULL) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_2, + "qmckl_tensor_free", + "Null pointer"); + } + /* Always true */ assert (tensor->data != NULL); - + qmckl_exit_code rc; - + rc = qmckl_free(context, tensor->data); if (rc != QMCKL_SUCCESS) { return rc; } - + memset(tensor, 0, sizeof(qmckl_tensor)); return QMCKL_SUCCESS; @@ -326,7 +347,7 @@ qmckl_tensor_free( qmckl_context context, ** Reshaping Reshaping occurs in-place and the pointer to the data is copied. - + *** Vector -> Matrix #+begin_src c :comments org :tangle (eval h_private_func) @@ -343,7 +364,7 @@ qmckl_matrix qmckl_matrix_of_vector(const qmckl_vector vector, const int64_t size1, const int64_t size2) -{ +{ /* Always true */ assert (size1 * size2 == vector.size); @@ -373,7 +394,7 @@ qmckl_tensor qmckl_tensor_of_vector(const qmckl_vector vector, const int64_t order, const int64_t* size) -{ +{ qmckl_tensor result; int64_t prod_size = 1; @@ -401,7 +422,7 @@ qmckl_vector_of_matrix(const qmckl_matrix matrix); #+begin_src c :comments org :tangle (eval c) :exports none qmckl_vector qmckl_vector_of_matrix(const qmckl_matrix matrix) -{ +{ qmckl_vector result; result.size = matrix.size[0] * matrix.size[1]; @@ -427,7 +448,7 @@ qmckl_tensor qmckl_tensor_of_matrix(const qmckl_matrix matrix, const int64_t order, const int64_t* size) -{ +{ qmckl_tensor result; int64_t prod_size = 1; @@ -455,7 +476,7 @@ qmckl_vector_of_tensor(const qmckl_tensor tensor); #+begin_src c :comments org :tangle (eval c) :exports none qmckl_vector qmckl_vector_of_tensor(const qmckl_tensor tensor) -{ +{ int64_t prod_size = (int64_t) tensor.size[0]; for (int64_t i=1 ; idata, C->size[0]); break; case 1: - if (A.size[0] != B.size[0]) { + if (A.size[0] != B.size[0]) { return qmckl_failwith( context, QMCKL_INVALID_ARG_2, "qmckl_matmul", @@ -1243,7 +1264,7 @@ qmckl_matmul (const qmckl_context context, #+begin_src python :exports none :results output import numpy as np -A = np.array([[ 1., 2., 3., 4. ], +A = np.array([[ 1., 2., 3., 4. ], [ 5., 6., 7., 8. ], [ 9., 10., 11., 12. ]]) @@ -1282,7 +1303,7 @@ print(C.T) 2., 6., 10., 3., 7., 11., 4., 8., 12. }; - + double b[20] = { 1., 5., 9., 10., -2., -6., 10., 11., 3., 7., 11., 12., @@ -1317,7 +1338,7 @@ print(C.T) printf("%f %f\n", cnew[i], c[i]); assert (c[i] == cnew[i]); } -} +} #+end_src ** ~qmckl_adjugate~ @@ -1424,7 +1445,7 @@ integer function qmckl_adjugate_f(context, na, A, LDA, B, ldb, det_l) & end function qmckl_adjugate_f #+end_src - + #+begin_src f90 :tangle (eval f) :exports none subroutine adjugate2(A,LDA,B,LDB,na,det_l) implicit none @@ -2213,12 +2234,12 @@ assert(QMCKL_SUCCESS == test_qmckl_adjugate(context)); | ~context~ | ~qmckl_context~ | in | Global state | | ~A~ | ~qmckl_matrix~ | in | Input matrix | | ~At~ | ~qmckl_matrix~ | out | Transposed matrix | - + #+begin_src c :tangle (eval h_private_func) :comments org qmckl_exit_code qmckl_transpose (qmckl_context context, const qmckl_matrix A, - qmckl_matrix At ); + qmckl_matrix At ); #+end_src @@ -2253,10 +2274,10 @@ qmckl_transpose (qmckl_context context, "Invalid size for At"); } - for (int64_t j=0 ; jpoint.num < num) { + if (ctx->point.num != num) { if (ctx->point.coord.data != NULL) { rc = qmckl_matrix_free(context, &(ctx->point.coord)); diff --git a/org/qmckl_trexio.org b/org/qmckl_trexio.org index 12b7153..4194215 100644 --- a/org/qmckl_trexio.org +++ b/org/qmckl_trexio.org @@ -1086,7 +1086,7 @@ qmckl_trexio_read(const qmckl_context context, const char* file_name, const int6 qmckl_exit_code rc; char file_name_new[size_max+1]; - strncpy(file_name_new, file_name, size_max+1); + strncpy(file_name_new, file_name, size_max); file_name_new[size_max] = '\0'; #ifdef HAVE_TREXIO From 06e6221d336832abe2eabcdd6a23cd49952f98e3 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 8 Jul 2022 09:15:17 +0200 Subject: [PATCH 094/100] Avoid duplicate storage of constant parameters in AO --- org/qmckl_ao.org | 274 +++++++++++++++++++++++-------------------- org/qmckl_error.org | 12 +- org/qmckl_trexio.org | 7 -- 3 files changed, 154 insertions(+), 139 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 2e262b7..5fa94d5 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -360,12 +360,19 @@ qmckl_exit_code qmckl_init_ao_basis(qmckl_context context) { #+begin_src c :exports none if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return qmckl_failwith( context, - QMCKL_INVALID_CONTEXT, - "qmckl_get_ao_basis_*", + QMCKL_NULL_CONTEXT, + "qmckl_get_ao_*", NULL); } qmckl_context_struct* const ctx = (qmckl_context_struct*) context; + +if (!(ctx->ao_basis.uninitialized & mask)) { + return qmckl_failwith( context, + QMCKL_ALREADY_SET, + "qmckl_get_ao_*", + NULL); + } #+end_src #+NAME:post2 @@ -394,6 +401,8 @@ qmckl_exit_code qmckl_set_ao_basis_type(qmckl_context context, const char basis_type) { + int32_t mask = 1; + <> if (basis_type != 'G' && basis_type != 'S') { @@ -403,7 +412,6 @@ qmckl_set_ao_basis_type(qmckl_context context, NULL); } - int32_t mask = 1; ctx->ao_basis.type = basis_type; <> @@ -422,6 +430,8 @@ qmckl_exit_code qmckl_set_ao_basis_shell_num (qmckl_context context, const int64_t shell_num) { + int32_t mask = 1 << 1; + <> if (shell_num <= 0) { @@ -440,7 +450,6 @@ qmckl_set_ao_basis_shell_num (qmckl_context context, "shell_num > prim_num"); } - int32_t mask = 1 << 1; ctx->ao_basis.shell_num = shell_num; <> @@ -459,6 +468,8 @@ qmckl_exit_code qmckl_set_ao_basis_prim_num (qmckl_context context, const int64_t prim_num) { + int32_t mask = 1 << 2; + <> if (prim_num <= 0) { @@ -484,7 +495,6 @@ qmckl_set_ao_basis_prim_num (qmckl_context context, "prim_num < shell_num"); } - int32_t mask = 1 << 2; ctx->ao_basis.prim_num = prim_num; <> @@ -492,111 +502,6 @@ qmckl_set_ao_basis_prim_num (qmckl_context context, #+end_src - #+begin_src c :comments org :tangle (eval h_func) -qmckl_exit_code -qmckl_set_ao_basis_ao_num (qmckl_context context, - const int64_t ao_num); - #+end_src - - #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none -qmckl_exit_code -qmckl_set_ao_basis_ao_num (qmckl_context context, - const int64_t ao_num) -{ - <> - - if (ao_num <= 0) { - return qmckl_failwith( context, - QMCKL_INVALID_ARG_2, - "qmckl_set_ao_basis_shell_num", - "ao_num must be positive"); - } - - const int64_t shell_num = ctx->ao_basis.shell_num; - if (shell_num <= 0L) { - return qmckl_failwith( context, - QMCKL_INVALID_ARG_2, - "qmckl_set_ao_basis_shell_num", - "shell_num is not set"); - } - - if (ao_num < shell_num) { - return qmckl_failwith( context, - QMCKL_INVALID_ARG_2, - "qmckl_set_ao_basis_shell_num", - "ao_num < shell_num"); - } - - int32_t mask = 1 << 12; - ctx->ao_basis.ao_num = ao_num; - - <> -} - #+end_src - - - #+begin_src c :comments org :tangle (eval h_func) -qmckl_exit_code -qmckl_set_ao_basis_nucleus_index (qmckl_context context, - const int64_t* nucleus_index, - const int64_t size_max); - #+end_src - - #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none -qmckl_exit_code -qmckl_set_ao_basis_nucleus_index (qmckl_context context, - const int64_t* nucleus_index, - const int64_t size_max) -{ - <> - - int32_t mask = 1 << 4; - - const int64_t nucl_num = ctx->nucleus.num; - - if (nucl_num <= 0L) { - return qmckl_failwith( context, - QMCKL_FAILURE, - "qmckl_set_ao_basis_nucleus_index", - "nucl_num is not set"); - } - - if (size_max < nucl_num) { - return qmckl_failwith( context, - QMCKL_FAILURE, - "qmckl_set_ao_basis_nucleus_index", - "input array too small"); - } - - if (ctx->ao_basis.nucleus_index != NULL) { - qmckl_exit_code rc = qmckl_free(context, ctx->ao_basis.nucleus_index); - if (rc != QMCKL_SUCCESS) { - return qmckl_failwith( context, rc, - "qmckl_set_ao_basis_nucleus_index", - NULL); - } - } - - qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; - mem_info.size = nucl_num * sizeof(int64_t); - int64_t* new_array = (int64_t*) qmckl_malloc(context, mem_info); - - if (new_array == NULL) { - return qmckl_failwith( context, - QMCKL_ALLOCATION_FAILED, - "qmckl_set_ao_basis_nucleus_index", - NULL); - } - - memcpy(new_array, nucleus_index, mem_info.size); - - ctx->ao_basis.nucleus_index = new_array; - - <> -} - #+end_src - - #+begin_src c :comments org :tangle (eval h_func) qmckl_exit_code qmckl_set_ao_basis_nucleus_shell_num (qmckl_context context, @@ -610,10 +515,10 @@ qmckl_set_ao_basis_nucleus_shell_num (qmckl_context context, const int64_t* nucleus_shell_num, const int64_t size_max) { - <> - int32_t mask = 1 << 3; + <> + const int64_t nucl_num = ctx->nucleus.num; if (nucl_num <= 0L) { @@ -659,6 +564,68 @@ qmckl_set_ao_basis_nucleus_shell_num (qmckl_context context, #+end_src + #+begin_src c :comments org :tangle (eval h_func) +qmckl_exit_code +qmckl_set_ao_basis_nucleus_index (qmckl_context context, + const int64_t* nucleus_index, + const int64_t size_max); + #+end_src + + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none +qmckl_exit_code +qmckl_set_ao_basis_nucleus_index (qmckl_context context, + const int64_t* nucleus_index, + const int64_t size_max) +{ + int32_t mask = 1 << 4; + + <> + + const int64_t nucl_num = ctx->nucleus.num; + + if (nucl_num <= 0L) { + return qmckl_failwith( context, + QMCKL_FAILURE, + "qmckl_set_ao_basis_nucleus_index", + "nucl_num is not set"); + } + + if (size_max < nucl_num) { + return qmckl_failwith( context, + QMCKL_FAILURE, + "qmckl_set_ao_basis_nucleus_index", + "input array too small"); + } + + if (ctx->ao_basis.nucleus_index != NULL) { + qmckl_exit_code rc = qmckl_free(context, ctx->ao_basis.nucleus_index); + if (rc != QMCKL_SUCCESS) { + return qmckl_failwith( context, rc, + "qmckl_set_ao_basis_nucleus_index", + NULL); + } + } + + qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero; + mem_info.size = nucl_num * sizeof(int64_t); + int64_t* new_array = (int64_t*) qmckl_malloc(context, mem_info); + + if (new_array == NULL) { + return qmckl_failwith( context, + QMCKL_ALLOCATION_FAILED, + "qmckl_set_ao_basis_nucleus_index", + NULL); + } + + memcpy(new_array, nucleus_index, mem_info.size); + + ctx->ao_basis.nucleus_index = new_array; + + <> +} + #+end_src + + #+begin_src c :comments org :tangle (eval h_func) qmckl_exit_code qmckl_set_ao_basis_shell_ang_mom (qmckl_context context, @@ -672,10 +639,10 @@ qmckl_set_ao_basis_shell_ang_mom (qmckl_context context, const int32_t* shell_ang_mom, const int64_t size_max) { - <> - int32_t mask = 1 << 5; + <> + const int64_t shell_num = ctx->ao_basis.shell_num; if (shell_num == 0L) { @@ -735,10 +702,10 @@ qmckl_set_ao_basis_shell_prim_num (qmckl_context context, const int64_t* shell_prim_num, const int64_t size_max) { - <> - int32_t mask = 1 << 6; + <> + const int64_t shell_num = ctx->ao_basis.shell_num; if (shell_num <= 0L) { @@ -798,10 +765,10 @@ qmckl_set_ao_basis_shell_prim_index (qmckl_context context, const int64_t* shell_prim_index, const int64_t size_max) { - <> - int32_t mask = 1 << 7; + <> + const int64_t shell_num = ctx->ao_basis.shell_num; if (shell_num <= 0L) { @@ -860,10 +827,10 @@ qmckl_set_ao_basis_shell_factor (qmckl_context context, const double* shell_factor, const int64_t size_max) { - <> - int32_t mask = 1 << 8; + <> + const int64_t shell_num = ctx->ao_basis.shell_num; if (shell_num <= 0L) { @@ -922,10 +889,10 @@ qmckl_set_ao_basis_exponent (qmckl_context context, const double* exponent, const int64_t size_max) { - <> - int32_t mask = 1 << 9; + <> + const int64_t prim_num = ctx->ao_basis.prim_num; if (prim_num <= 0L) { @@ -984,10 +951,10 @@ qmckl_set_ao_basis_coefficient (qmckl_context context, const double* coefficient, const int64_t size_max) { - <> - int32_t mask = 1 << 10; + <> + const int64_t prim_num = ctx->ao_basis.prim_num; if (prim_num <= 0L) { @@ -1046,10 +1013,10 @@ qmckl_set_ao_basis_prim_factor (qmckl_context context, const double* prim_factor, const int64_t size_max) { - <> - int32_t mask = 1 << 11; + <> + const int64_t prim_num = ctx->ao_basis.prim_num; if (prim_num <= 0L) { @@ -1095,6 +1062,51 @@ qmckl_set_ao_basis_prim_factor (qmckl_context context, #+end_src + #+begin_src c :comments org :tangle (eval h_func) +qmckl_exit_code +qmckl_set_ao_basis_ao_num (qmckl_context context, + const int64_t ao_num); + #+end_src + + #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none +qmckl_exit_code +qmckl_set_ao_basis_ao_num (qmckl_context context, + const int64_t ao_num) +{ + int32_t mask = 1 << 12; + + <> + + if (ao_num <= 0) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_2, + "qmckl_set_ao_basis_shell_num", + "ao_num must be positive"); + } + + const int64_t shell_num = ctx->ao_basis.shell_num; + if (shell_num <= 0L) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_2, + "qmckl_set_ao_basis_shell_num", + "shell_num is not set"); + } + + if (ao_num < shell_num) { + return qmckl_failwith( context, + QMCKL_INVALID_ARG_2, + "qmckl_set_ao_basis_shell_num", + "ao_num < shell_num"); + } + + ctx->ao_basis.ao_num = ao_num; + + <> +} + #+end_src + + + #+begin_src c :comments org :tangle (eval h_func) qmckl_exit_code qmckl_set_ao_basis_ao_factor (qmckl_context context, @@ -1108,10 +1120,10 @@ qmckl_set_ao_basis_ao_factor (qmckl_context context, const double* ao_factor, const int64_t size_max) { - <> - int32_t mask = 1 << 13; + <> + const int64_t ao_num = ctx->ao_basis.ao_num; if (ao_num <= 0L) { @@ -1168,9 +1180,10 @@ qmckl_exit_code qmckl_set_ao_basis_cartesian (qmckl_context context, const bool cartesian) { + int32_t mask = 1; + <> - int32_t mask = 1; ctx->ao_basis.ao_cartesian = cartesian; <> @@ -2325,6 +2338,9 @@ rc = qmckl_set_ao_basis_nucleus_index (context, nucleus_index, nucl_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_ao_basis_provided(context)); +rc = qmckl_set_ao_basis_nucleus_index (context, nucleus_index, nucl_num); +assert(rc == QMCKL_ALREADY_SET); + rc = qmckl_set_ao_basis_nucleus_shell_num (context, nucleus_shell_num, nucl_num); assert(rc == QMCKL_SUCCESS); assert(!qmckl_ao_basis_provided(context)); diff --git a/org/qmckl_error.org b/org/qmckl_error.org index d23fa8e..25721d1 100644 --- a/org/qmckl_error.org +++ b/org/qmckl_error.org @@ -105,7 +105,8 @@ typedef int32_t qmckl_exit_code; | ~QMCKL_DEALLOCATION_FAILED~ | 105 | 'De-allocation failed' | | ~QMCKL_NOT_PROVIDED~ | 106 | 'Not provided' | | ~QMCKL_OUT_OF_BOUNDS~ | 107 | 'Index out of bounds' | - | ~QMCKL_INVALID_EXIT_CODE~ | 108 | 'Invalid exit code' | + | ~QMCKL_ALREADY_SET~ | 108 | 'Already set' | + | ~QMCKL_INVALID_EXIT_CODE~ | 109 | 'Invalid exit code' | # We need to force Emacs not to indent the Python code: # -*- org-src-preserve-indentation: t @@ -164,7 +165,8 @@ return '\n'.join(result) #define QMCKL_DEALLOCATION_FAILED ((qmckl_exit_code) 105) #define QMCKL_NOT_PROVIDED ((qmckl_exit_code) 106) #define QMCKL_OUT_OF_BOUNDS ((qmckl_exit_code) 107) - #define QMCKL_INVALID_EXIT_CODE ((qmckl_exit_code) 108) + #define QMCKL_ALREADY_SET ((qmckl_exit_code) 108) + #define QMCKL_INVALID_EXIT_CODE ((qmckl_exit_code) 109) #+end_src #+begin_src f90 :comments org :tangle (eval fh_type) :exports none @@ -196,7 +198,8 @@ return '\n'.join(result) integer(qmckl_exit_code), parameter :: QMCKL_DEALLOCATION_FAILED = 105 integer(qmckl_exit_code), parameter :: QMCKL_NOT_PROVIDED = 106 integer(qmckl_exit_code), parameter :: QMCKL_OUT_OF_BOUNDS = 107 - integer(qmckl_exit_code), parameter :: QMCKL_INVALID_EXIT_CODE = 108 + integer(qmckl_exit_code), parameter :: QMCKL_ALREADY_SET = 108 + integer(qmckl_exit_code), parameter :: QMCKL_INVALID_EXIT_CODE = 109 #+end_src :end: @@ -330,6 +333,9 @@ return '\n'.join(result) case QMCKL_OUT_OF_BOUNDS: return "Index out of bounds"; + case QMCKL_ALREADY_SET: + return "Already set"; + case QMCKL_INVALID_EXIT_CODE: return "Invalid exit code"; #+end_example diff --git a/org/qmckl_trexio.org b/org/qmckl_trexio.org index 4194215..c664f3a 100644 --- a/org/qmckl_trexio.org +++ b/org/qmckl_trexio.org @@ -428,13 +428,6 @@ qmckl_trexio_read_ao_X(qmckl_context context, trexio_t* const file) } /* Reformat data */ - rc = qmckl_set_ao_basis_nucleus_index(context, nucleus_index, nucleus_num); - if (rc != QMCKL_SUCCESS) { - qmckl_free(context, nucleus_index); - nucleus_index = NULL; - return rc; - } - for (int i=shell_num-1 ; i>=0 ; --i) { const int k = tmp_array[i]; if (k < 0 || k >= nucleus_num) { From 6ba83ee33f64357dea13670dd232fc7d23eba7e1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 8 Jul 2022 09:24:33 +0200 Subject: [PATCH 095/100] Avoid duplicate storage of constant parameters in nucleus and electron --- org/qmckl_ao.org | 36 +++--------------------------------- org/qmckl_electron.org | 41 +++++++++++++++++++---------------------- org/qmckl_nucleus.org | 28 ++++++++++++++++++++-------- 3 files changed, 42 insertions(+), 63 deletions(-) diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org index 5fa94d5..f50dd1c 100644 --- a/org/qmckl_ao.org +++ b/org/qmckl_ao.org @@ -361,16 +361,16 @@ qmckl_exit_code qmckl_init_ao_basis(qmckl_context context) { if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { return qmckl_failwith( context, QMCKL_NULL_CONTEXT, - "qmckl_get_ao_*", + "qmckl_set_ao_*", NULL); - } +} qmckl_context_struct* const ctx = (qmckl_context_struct*) context; if (!(ctx->ao_basis.uninitialized & mask)) { return qmckl_failwith( context, QMCKL_ALREADY_SET, - "qmckl_get_ao_*", + "qmckl_set_ao_*", NULL); } #+end_src @@ -4168,20 +4168,10 @@ print ( "[1][4][26] : %25.15e"% lf(a,x,y)) #+begin_src c :tangle (eval c_test) :exports none { -#define walk_num 1 // chbrclf_walk_num -#define elec_num chbrclf_elec_num #define shell_num chbrclf_shell_num - int64_t elec_up_num = chbrclf_elec_up_num; - int64_t elec_dn_num = chbrclf_elec_dn_num; double* elec_coord = &(chbrclf_elec_coord[0][0][0]); - rc = qmckl_set_electron_num (context, elec_up_num, elec_dn_num); - assert (rc == QMCKL_SUCCESS); - - rc = qmckl_set_electron_walk_num (context, walk_num); - assert (rc == QMCKL_SUCCESS); - assert(qmckl_electron_provided(context)); rc = qmckl_set_electron_coord (context, 'N', elec_coord, walk_num*elec_num*3); @@ -6141,21 +6131,11 @@ print ( "[26][1][224] : %25.15e"%(df(a,x,y,1)* (x[2] - y[2]) * (x[2] - y[2])) ) #+begin_src c :tangle (eval c_test) :exports none { -#define walk_num 1 // chbrclf_walk_num -#define elec_num chbrclf_elec_num #define shell_num chbrclf_shell_num #define ao_num chbrclf_ao_num -int64_t elec_up_num = chbrclf_elec_up_num; -int64_t elec_dn_num = chbrclf_elec_dn_num; double* elec_coord = &(chbrclf_elec_coord[0][0][0]); -rc = qmckl_set_electron_num (context, elec_up_num, elec_dn_num); -assert (rc == QMCKL_SUCCESS); - -rc = qmckl_set_electron_walk_num (context, walk_num); -assert (rc == QMCKL_SUCCESS); - assert(qmckl_electron_provided(context)); rc = qmckl_set_electron_coord (context, 'N', elec_coord, walk_num*elec_num*3); @@ -7137,21 +7117,11 @@ print ( "[26][1][224] : %25.15e"%(df(a,x,y,1)* (x[2] - y[2]) * (x[2] - y[2])) ) #+begin_src c :tangle (eval c_test) :exports none { -#define walk_num 1 // chbrclf_walk_num -#define elec_num chbrclf_elec_num #define shell_num chbrclf_shell_num #define ao_num chbrclf_ao_num -int64_t elec_up_num = chbrclf_elec_up_num; -int64_t elec_dn_num = chbrclf_elec_dn_num; double* elec_coord = &(chbrclf_elec_coord[0][0][0]); -rc = qmckl_set_electron_num (context, elec_up_num, elec_dn_num); -assert (rc == QMCKL_SUCCESS); - -rc = qmckl_set_electron_walk_num (context, walk_num); -assert (rc == QMCKL_SUCCESS); - assert(qmckl_electron_provided(context)); rc = qmckl_set_electron_coord (context, 'N', elec_coord, walk_num*elec_num*3); diff --git a/org/qmckl_electron.org b/org/qmckl_electron.org index a4736cc..3b739eb 100644 --- a/org/qmckl_electron.org +++ b/org/qmckl_electron.org @@ -490,6 +490,13 @@ if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { } qmckl_context_struct* const ctx = (qmckl_context_struct*) context; + +if (mask != 0 && !(ctx->electron.uninitialized & mask)) { + return qmckl_failwith( context, + QMCKL_ALREADY_SET, + "qmckl_set_electron_*", + NULL); +} #+end_src #+NAME:post2 @@ -544,6 +551,8 @@ qmckl_exit_code qmckl_set_electron_num(qmckl_context context, const int64_t up_num, const int64_t down_num) { + int32_t mask = 1 << 0; + <> if (up_num <= 0) { @@ -560,8 +569,6 @@ qmckl_set_electron_num(qmckl_context context, "down_num < 0"); } - int32_t mask = 1 << 0; - ctx->electron.up_num = up_num; ctx->electron.down_num = down_num; ctx->electron.num = up_num + down_num; @@ -576,6 +583,8 @@ qmckl_set_electron_num(qmckl_context context, qmckl_exit_code qmckl_set_electron_walk_num(qmckl_context context, const int64_t walk_num) { + int32_t mask = 1 << 1; + <> if (walk_num <= 0) { @@ -585,7 +594,6 @@ qmckl_set_electron_walk_num(qmckl_context context, const int64_t walk_num) { "walk_num <= 0"); } - int32_t mask = 1 << 1; ctx->electron.walk_num = walk_num; <> @@ -598,6 +606,9 @@ qmckl_set_electron_walk_num(qmckl_context context, const int64_t walk_num) { qmckl_exit_code qmckl_set_electron_rescale_factor_ee(qmckl_context context, const double rescale_factor_kappa_ee) { + + int32_t mask = 0; // can be changed + <> if (rescale_factor_kappa_ee <= 0.0) { @@ -615,6 +626,9 @@ qmckl_set_electron_rescale_factor_ee(qmckl_context context, qmckl_exit_code qmckl_set_electron_rescale_factor_en(qmckl_context context, const double rescale_factor_kappa_en) { + + int32_t mask = 0; // can be changed + <> if (rescale_factor_kappa_en <= 0.0) { @@ -675,6 +689,8 @@ qmckl_set_electron_coord(qmckl_context context, const int64_t size_max) { + int32_t mask = 0; // coord can be changed + <> if (transp != 'N' && transp != 'T') { @@ -2318,16 +2334,6 @@ print ( "[1][0][1] : ", (1.0 - np.exp(-kappa * np.linalg.norm(elec_2_w2-nucl_1)) #+begin_src c :tangle (eval c_test) assert(qmckl_electron_provided(context)); - -rc = qmckl_set_nucleus_num (context, nucl_num); -assert(rc == QMCKL_SUCCESS); - -rc = qmckl_set_nucleus_charge (context, charge, nucl_num); -assert (rc == QMCKL_SUCCESS); - -rc = qmckl_set_nucleus_coord (context, 'T', nucl_coord, 3*nucl_num); -assert (rc == QMCKL_SUCCESS); - assert(qmckl_nucleus_provided(context)); double en_distance_rescaled[walk_num][nucl_num][elec_num]; @@ -2586,18 +2592,9 @@ import numpy as np assert(qmckl_electron_provided(context)); -rc = qmckl_set_nucleus_num (context, nucl_num); -assert(rc == QMCKL_SUCCESS); - rc = qmckl_set_nucleus_rescale_factor (context, nucl_rescale_factor_kappa); assert(rc == QMCKL_SUCCESS); -rc = qmckl_set_nucleus_charge (context, charge, nucl_num); -assert (rc == QMCKL_SUCCESS); - -rc = qmckl_set_nucleus_coord (context, 'T', nucl_coord, 3*nucl_num); -assert (rc == QMCKL_SUCCESS); - assert(qmckl_nucleus_provided(context)); double en_distance_rescaled_deriv_e[walk_num][4][nucl_num][elec_num]; diff --git a/org/qmckl_nucleus.org b/org/qmckl_nucleus.org index 05907ce..f1d89d3 100644 --- a/org/qmckl_nucleus.org +++ b/org/qmckl_nucleus.org @@ -422,10 +422,20 @@ bool qmckl_nucleus_provided(const qmckl_context context) { #+NAME:pre2 #+begin_src c :exports none if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { - return QMCKL_NULL_CONTEXT; - } + return qmckl_failwith( context, + QMCKL_NULL_CONTEXT, + "qmckl_set_nucleus_*", + NULL); +} qmckl_context_struct* const ctx = (qmckl_context_struct*) context; + +if (mask != 0 && !(ctx->nucleus.uninitialized & mask)) { + return qmckl_failwith( context, + QMCKL_ALREADY_SET, + "qmckl_set_nucleus_*", + NULL); +} #+end_src #+NAME:post2 @@ -452,6 +462,8 @@ qmckl_exit_code qmckl_set_nucleus_num(qmckl_context context, const int64_t num) { + int32_t mask = 1 << 0; + <> if (num <= 0) { @@ -461,8 +473,6 @@ qmckl_set_nucleus_num(qmckl_context context, "num <= 0"); } - int32_t mask = 1 << 0; - ctx->nucleus.num = num; <> @@ -498,6 +508,8 @@ qmckl_set_nucleus_charge(qmckl_context context, const double* charge, const int64_t size_max) { + int32_t mask = 1 << 1; + <> if (charge == NULL) { @@ -510,8 +522,6 @@ qmckl_set_nucleus_charge(qmckl_context context, int64_t num; qmckl_exit_code rc; - int32_t mask = 1 << 1; - rc = qmckl_get_nucleus_num(context, &num); if (rc != QMCKL_SUCCESS) return rc; @@ -569,12 +579,12 @@ qmckl_set_nucleus_coord(qmckl_context context, const double* coord, const int64_t size_max) { + int32_t mask = 1 << 2; + <> qmckl_exit_code rc; - int32_t mask = 1 << 2; - const int64_t nucl_num = (int64_t) ctx->nucleus.num; if (ctx->nucleus.coord.data != NULL) { @@ -641,6 +651,8 @@ qmckl_exit_code qmckl_set_nucleus_rescale_factor(qmckl_context context, const double rescale_factor_kappa) { + int32_t mask = 0; // Can be updated + <> if (rescale_factor_kappa <= 0.0) { From 21a9d6c51e81fdbbb5eaf29625f73a2ae1fe10ac Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 9 Jul 2022 11:17:52 +0200 Subject: [PATCH 096/100] mask in MOs --- org/qmckl_mo.org | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org index 3e21623..68e6305 100644 --- a/org/qmckl_mo.org +++ b/org/qmckl_mo.org @@ -305,6 +305,13 @@ if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) { } qmckl_context_struct* const ctx = (qmckl_context_struct*) context; + +if (!(ctx->mo_basis.uninitialized & mask)) { + return qmckl_failwith( context, + QMCKL_ALREADY_SET, + "qmckl_set_mo_*", + NULL); + } #+end_src #+NAME:post @@ -320,6 +327,9 @@ return QMCKL_SUCCESS; #+begin_src c :comments org :tangle (eval c) :noweb yes :exports none qmckl_exit_code qmckl_set_mo_basis_mo_num(qmckl_context context, const int64_t mo_num) { + + int32_t mask = 1 ; + <
>
 
   if (mo_num <= 0) {
@@ -329,17 +339,17 @@ qmckl_exit_code qmckl_set_mo_basis_mo_num(qmckl_context context, const int64_t m
                            "mo_num <= 0");
   }
 
-  int32_t mask = 1 ;
   ctx->mo_basis.mo_num = mo_num;
 
   <>
 }
 
 qmckl_exit_code  qmckl_set_mo_basis_coefficient(qmckl_context context, const double* coefficient) {
-  <
>
-
+  
   int32_t mask = 1 << 1;
 
+  <
>
+
   if (ctx->mo_basis.coefficient != NULL) {
     qmckl_exit_code rc = qmckl_free(context, ctx->mo_basis.coefficient);
     if (rc != QMCKL_SUCCESS) {

From 8ee9e9dcca6c8b313ef504918ea0136372caa775 Mon Sep 17 00:00:00 2001
From: Anthony Scemama 
Date: Sat, 9 Jul 2022 11:34:38 +0200
Subject: [PATCH 097/100] check mask in jastrow

---
 org/qmckl_ao.org      |  2 +-
 org/qmckl_jastrow.org | 34 +++++++++++++++++++++++-----------
 org/qmckl_mo.org      |  2 +-
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org
index f50dd1c..e61dfa4 100644
--- a/org/qmckl_ao.org
+++ b/org/qmckl_ao.org
@@ -367,7 +367,7 @@ if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
 
 qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
 
-if (!(ctx->ao_basis.uninitialized & mask)) {
+if (mask != 0 && !(ctx->ao_basis.uninitialized & mask)) {
     return qmckl_failwith( context,
                            QMCKL_ALREADY_SET,
                            "qmckl_set_ao_*",
diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org
index 5fe060a..ca94399 100644
--- a/org/qmckl_jastrow.org
+++ b/org/qmckl_jastrow.org
@@ -407,7 +407,7 @@ qmckl_exit_code qmckl_init_jastrow(qmckl_context context) {
   qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
   assert (ctx != NULL);
 
-  ctx->jastrow.uninitialized = (1 << 5) - 1;
+  ctx->jastrow.uninitialized = (1 << 6) - 1;
 
   /* Default values */
 
@@ -746,6 +746,14 @@ if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
  }
 
 qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
+
+if (mask != 0 && !(ctx->jastrow.uninitialized & mask)) {
+  printf("%d %d\n", mask, ctx->jastrow.uninitialized );
+    return qmckl_failwith( context,
+                           QMCKL_ALREADY_SET,
+                           "qmckl_set_jastrow_*",
+                           NULL);
+ }
    #+end_src
 
    #+NAME:post2
@@ -768,6 +776,9 @@ qmckl_set_jastrow_ord_num(qmckl_context context,
                           const int64_t bord_num,
                           const int64_t cord_num)
 {
+
+  int32_t mask = 1 << 0;
+
 <>
 
   if (aord_num <= 0) {
@@ -791,7 +802,6 @@ qmckl_set_jastrow_ord_num(qmckl_context context,
                            "cord_num <= 0");
   }
 
-  int32_t mask = 1 << 0;
   ctx->jastrow.aord_num = aord_num;
   ctx->jastrow.bord_num = bord_num;
   ctx->jastrow.cord_num = cord_num;
@@ -803,6 +813,8 @@ qmckl_set_jastrow_ord_num(qmckl_context context,
 qmckl_exit_code
 qmckl_set_jastrow_type_nucl_num(qmckl_context context, const int64_t type_nucl_num)
 {
+  int32_t mask = 1 << 1;
+
 <>
 
   if (type_nucl_num <= 0) {
@@ -812,7 +824,6 @@ qmckl_set_jastrow_type_nucl_num(qmckl_context context, const int64_t type_nucl_n
                            "type_nucl_num < 0");
   }
 
-  int32_t mask = 1 << 1;
   ctx->jastrow.type_nucl_num = type_nucl_num;
 
   <>
@@ -824,10 +835,11 @@ qmckl_set_jastrow_type_nucl_vector(qmckl_context context,
                                    int64_t const * type_nucl_vector,
                                    const int64_t nucl_num)
 {
-<>
 
   int32_t mask = 1 << 2;
 
+<>
+
   int64_t type_nucl_num;
   qmckl_exit_code rc = qmckl_get_jastrow_type_nucl_num(context, &type_nucl_num);
   if (rc != QMCKL_SUCCESS) return rc;
@@ -879,10 +891,10 @@ qmckl_set_jastrow_aord_vector(qmckl_context context,
                               double const * aord_vector,
                               const int64_t size_max)
 {
-<>
-
   int32_t mask = 1 << 3;
 
+<>
+
   int64_t aord_num;
   qmckl_exit_code rc = qmckl_get_jastrow_aord_num(context, &aord_num);
   if (rc != QMCKL_SUCCESS) return rc;
@@ -946,10 +958,10 @@ qmckl_set_jastrow_bord_vector(qmckl_context context,
                               double const * bord_vector,
                               const int64_t size_max)
 {
-<>
-
   int32_t mask = 1 << 4;
 
+<>
+
   int64_t bord_num;
   qmckl_exit_code rc = qmckl_get_jastrow_bord_num(context, &bord_num);
   if (rc != QMCKL_SUCCESS) return rc;
@@ -1009,10 +1021,10 @@ qmckl_set_jastrow_cord_vector(qmckl_context context,
                               double const * cord_vector,
                               const int64_t size_max)
 {
-<>
-
   int32_t mask = 1 << 5;
 
+<>
+
   qmckl_exit_code rc = qmckl_provide_dim_cord_vect(context);
   if (rc != QMCKL_SUCCESS) return rc;
 
@@ -1042,7 +1054,7 @@ qmckl_set_jastrow_cord_vector(qmckl_context context,
     rc = qmckl_free(context, ctx->jastrow.cord_vector);
     if (rc != QMCKL_SUCCESS) {
       return qmckl_failwith( context, rc,
-                             "qmckl_set_ord_vector",
+                             "qmckl_set_cord_vector",
                              NULL);
     }
   }
diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org
index 68e6305..f9ae16e 100644
--- a/org/qmckl_mo.org
+++ b/org/qmckl_mo.org
@@ -306,7 +306,7 @@ if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
 
 qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
 
-if (!(ctx->mo_basis.uninitialized & mask)) {
+if (mask != 0 && !(ctx->mo_basis.uninitialized & mask)) {
     return qmckl_failwith( context,
                            QMCKL_ALREADY_SET,
                            "qmckl_set_mo_*",

From e08456bd61275735386382f8ad79eb74d45e324c Mon Sep 17 00:00:00 2001
From: Anthony Scemama 
Date: Sat, 9 Jul 2022 13:11:20 +0200
Subject: [PATCH 098/100] Introduced write_provider org functions

---
 org/qmckl_ao.org    | 150 ++++++++++++++++++++++++++++----------
 org/qmckl_mo.org    |   4 +-
 org/qmckl_point.org |  22 ++++--
 tools/lib.org       | 171 +++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 291 insertions(+), 56 deletions(-)

diff --git a/org/qmckl_ao.org b/org/qmckl_ao.org
index e61dfa4..f28ea8b 100644
--- a/org/qmckl_ao.org
+++ b/org/qmckl_ao.org
@@ -3140,7 +3140,7 @@ qmckl_get_ao_basis_ao_value (qmckl_context context,
 
   qmckl_exit_code rc;
 
-  rc = qmckl_provide_ao_value(context);
+  rc = qmckl_provide_ao_basis_ao_value(context);
   if (rc != QMCKL_SUCCESS) return rc;
 
   qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
@@ -3216,7 +3216,7 @@ qmckl_get_ao_basis_ao_value_inplace (qmckl_context context,
 
   ctx->ao_basis.ao_value = ao_value;
 
-  rc = qmckl_provide_ao_value(context);
+  rc = qmckl_provide_ao_basis_ao_value(context);
   if (rc != QMCKL_SUCCESS) return rc;
 
   ctx->ao_basis.ao_value = old_array;
@@ -3624,18 +3624,26 @@ end function qmckl_compute_ao_basis_primitive_gaussian_vgl_f
 
 *** Provide                                                        :noexport:
 
-    #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none
-qmckl_exit_code qmckl_provide_ao_basis_primitive_vgl(qmckl_context context);
-    #+end_src
+#+CALL: write_provider_header( group="ao_basis", data="primitive_vgl" )
 
-    #+begin_src c :comments org :tangle (eval c) :noweb yes  :exports none
+#+RESULTS:
+#+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :export none
+qmckl_exit_code qmckl_provide_ao_basis_primitive_vgl(qmckl_context context);
+#+end_src
+
+#+CALL: write_provider_pre( group="ao_basis", data="primitive_vgl", dimension="ctx->ao_basis.prim_num * 5 * ctx->point.num")
+
+#+RESULTS:
+#+begin_src c :comments org :tangle (eval c) :noweb yes :export none
 qmckl_exit_code qmckl_provide_ao_basis_primitive_vgl(qmckl_context context)
 {
 
+  qmckl_exit_code rc;
+
   if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
     return qmckl_failwith( context,
                            QMCKL_INVALID_CONTEXT,
-                           "qmckl_provide_get_ao_basis_primitive_vgl",
+                           "qmckl_provide_ao_basis_primitive_vgl",
                            NULL);
   }
 
@@ -3645,19 +3653,26 @@ qmckl_exit_code qmckl_provide_ao_basis_primitive_vgl(qmckl_context context)
   if (!ctx->ao_basis.provided) {
     return qmckl_failwith( context,
                            QMCKL_NOT_PROVIDED,
-                           "qmckl_ao_basis_primitive_vgl",
+                           "qmckl_provide_ao_basis_primitive_vgl",
                            NULL);
   }
 
   /* Compute if necessary */
   if (ctx->point.date > ctx->ao_basis.primitive_vgl_date) {
 
+    if (ctx->point.alloc_date > ctx->ao_basis.primitive_vgl_date) {
+      if (ctx->ao_basis.primitive_vgl != NULL) {
+        rc = qmckl_free(context, ctx->ao_basis.primitive_vgl);
+        assert (rc == QMCKL_SUCCESS);
+        ctx->ao_basis.primitive_vgl = NULL;
+      }
+    }
+
     /* Allocate array */
     if (ctx->ao_basis.primitive_vgl == NULL) {
 
       qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero;
-      mem_info.size = ctx->ao_basis.prim_num * 5 * ctx->point.num *
-        sizeof(double);
+      mem_info.size = ctx->ao_basis.prim_num * 5 * ctx->point.num * sizeof(double);
       double* primitive_vgl = (double*) qmckl_malloc(context, mem_info);
 
       if (primitive_vgl == NULL) {
@@ -3669,7 +3684,9 @@ qmckl_exit_code qmckl_provide_ao_basis_primitive_vgl(qmckl_context context)
       ctx->ao_basis.primitive_vgl = primitive_vgl;
     }
 
-    qmckl_exit_code rc;
+#+end_src
+
+    #+begin_src c :comments org :tangle (eval c) :noweb yes  :exports none
     if (ctx->ao_basis.type == 'G') {
       rc = qmckl_compute_ao_basis_primitive_gaussian_vgl(context,
                                                          ctx->ao_basis.prim_num,
@@ -3686,16 +3703,22 @@ qmckl_exit_code qmckl_provide_ao_basis_primitive_vgl(qmckl_context context)
                              "compute_ao_basis_primitive_vgl",
                              "Not yet implemented");
     }
+    #+end_src
+
+#+CALL: write_provider_post( group="ao_basis", data="shell_vgl" )
+
+#+RESULTS:
+#+begin_src c :comments org :tangle (eval c) :noweb yes :export none
     if (rc != QMCKL_SUCCESS) {
       return rc;
     }
 
-    ctx->ao_basis.primitive_vgl_date = ctx->date;
+    ctx->ao_basis.shell_vgl_date = ctx->date;
   }
 
   return QMCKL_SUCCESS;
 }
-    #+end_src
+#+end_src
 
 *** Test                                                           :noexport:
 
@@ -4030,14 +4053,22 @@ end function qmckl_compute_ao_basis_shell_gaussian_vgl_f
 
 *** Provide                                                        :noexport:
 
-    #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none
-qmckl_exit_code qmckl_provide_ao_basis_shell_vgl(qmckl_context context);
-    #+end_src
+#+CALL: write_provider_header( group="ao_basis", data="shell_vgl" )
 
-    #+begin_src c :comments org :tangle (eval c) :noweb yes  :exports none
+#+RESULTS:
+#+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :export none
+qmckl_exit_code qmckl_provide_ao_basis_shell_vgl(qmckl_context context);
+#+end_src
+
+#+CALL: write_provider_pre( group="ao_basis", data="shell_vgl", dimension="ctx->ao_basis.shell_num * 5 * ctx->point.num")
+
+#+RESULTS:
+#+begin_src c :comments org :tangle (eval c) :noweb yes :export none
 qmckl_exit_code qmckl_provide_ao_basis_shell_vgl(qmckl_context context)
 {
 
+  qmckl_exit_code rc;
+
   if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
     return qmckl_failwith( context,
                            QMCKL_INVALID_CONTEXT,
@@ -4058,6 +4089,14 @@ qmckl_exit_code qmckl_provide_ao_basis_shell_vgl(qmckl_context context)
   /* Compute if necessary */
   if (ctx->point.date > ctx->ao_basis.shell_vgl_date) {
 
+    if (ctx->point.alloc_date > ctx->ao_basis.shell_vgl_date) {
+      if (ctx->ao_basis.shell_vgl != NULL) {
+        rc = qmckl_free(context, ctx->ao_basis.shell_vgl);
+        assert (rc == QMCKL_SUCCESS);
+        ctx->ao_basis.shell_vgl = NULL;
+      }
+    }
+
     /* Allocate array */
     if (ctx->ao_basis.shell_vgl == NULL) {
 
@@ -4074,7 +4113,9 @@ qmckl_exit_code qmckl_provide_ao_basis_shell_vgl(qmckl_context context)
       ctx->ao_basis.shell_vgl = shell_vgl;
     }
 
-    qmckl_exit_code rc;
+#+end_src
+
+#+begin_src c :comments org :tangle (eval c) :noweb yes  :exports none
     if (ctx->ao_basis.type == 'G') {
       rc = qmckl_compute_ao_basis_shell_gaussian_vgl(context,
                                                      ctx->ao_basis.prim_num,
@@ -4097,6 +4138,11 @@ qmckl_exit_code qmckl_provide_ao_basis_shell_vgl(qmckl_context context)
                              "compute_ao_basis_shell_vgl",
                              "Not yet implemented");
     }
+#+end_src
+#+CALL: write_provider_post( group="ao_basis", data="shell_vgl" )
+
+#+RESULTS:
+#+begin_src c :comments org :tangle (eval c) :noweb yes :export none
     if (rc != QMCKL_SUCCESS) {
       return rc;
     }
@@ -4106,7 +4152,7 @@ qmckl_exit_code qmckl_provide_ao_basis_shell_vgl(qmckl_context context)
 
   return QMCKL_SUCCESS;
 }
-    #+end_src
+#+end_src
 
 *** Test                                                           :noexport:
 
@@ -5919,18 +5965,26 @@ qmckl_compute_ao_value_hpc_gaussian (const qmckl_context context,
 
 **** Provide                                                       :noexport:
 
-     #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :exports none
-qmckl_exit_code qmckl_provide_ao_value(qmckl_context context);
-     #+end_src
+#+CALL: write_provider_header( group="ao_basis", data="ao_value" )
 
-     #+begin_src c :comments org :tangle (eval c) :noweb yes  :exports none
-qmckl_exit_code qmckl_provide_ao_value(qmckl_context context)
+#+RESULTS:
+#+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :export none
+qmckl_exit_code qmckl_provide_ao_basis_ao_value(qmckl_context context);
+#+end_src
+
+#+CALL: write_provider_pre( group="ao_basis", data="ao_value", dimension="ctx->ao_basis.ao_num * ctx->point.num")
+
+#+RESULTS:
+#+begin_src c :comments org :tangle (eval c) :noweb yes :export none
+qmckl_exit_code qmckl_provide_ao_basis_ao_value(qmckl_context context)
 {
 
+  qmckl_exit_code rc;
+
   if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
     return qmckl_failwith( context,
                            QMCKL_INVALID_CONTEXT,
-                           "qmckl_provide_ao_value",
+                           "qmckl_provide_ao_basis_ao_value",
                            NULL);
   }
 
@@ -5940,28 +5994,26 @@ qmckl_exit_code qmckl_provide_ao_value(qmckl_context context)
   if (!ctx->ao_basis.provided) {
     return qmckl_failwith( context,
                            QMCKL_NOT_PROVIDED,
-                           "qmckl_ao_value",
+                           "qmckl_provide_ao_basis_ao_value",
                            NULL);
   }
 
   /* Compute if necessary */
   if (ctx->point.date > ctx->ao_basis.ao_value_date) {
 
-    qmckl_exit_code rc;
-
-    /* Provide required data */
-#ifndef HAVE_HPC
-    rc = qmckl_provide_ao_basis_shell_vgl(context);
-    if (rc != QMCKL_SUCCESS) {
-        return qmckl_failwith( context, rc, "qmckl_provide_ao_basis_shell_vgl", NULL);
+    if (ctx->point.alloc_date > ctx->ao_basis.ao_value_date) {
+      if (ctx->ao_basis.ao_value != NULL) {
+        rc = qmckl_free(context, ctx->ao_basis.ao_value);
+        assert (rc == QMCKL_SUCCESS);
+        ctx->ao_basis.ao_value = NULL;
+      }
     }
-#endif
 
     /* Allocate array */
     if (ctx->ao_basis.ao_value == NULL) {
 
       qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero;
-      mem_info.size = ctx->ao_basis.ao_num * 5 * ctx->point.num * sizeof(double);
+      mem_info.size = ctx->ao_basis.ao_num * ctx->point.num * sizeof(double);
       double* ao_value = (double*) qmckl_malloc(context, mem_info);
 
       if (ao_value == NULL) {
@@ -5973,6 +6025,9 @@ qmckl_exit_code qmckl_provide_ao_value(qmckl_context context)
       ctx->ao_basis.ao_value = ao_value;
     }
 
+#+end_src
+
+     #+begin_src c :comments org :tangle (eval c) :noweb yes  :exports none
     if (ctx->ao_basis.ao_vgl_date == ctx->point.date) {
 
       // ao_vgl has been computed at this step: Just copy the data.
@@ -6031,6 +6086,12 @@ qmckl_exit_code qmckl_provide_ao_value(qmckl_context context)
           ctx->ao_basis.ao_value);
         ,*/
       } else {
+        /* Provide required data */
+        rc = qmckl_provide_ao_basis_shell_vgl(context);
+        if (rc != QMCKL_SUCCESS) {
+            return qmckl_failwith( context, rc, "qmckl_provide_ao_basis_shell_vgl", NULL);
+        }
+
         rc = qmckl_compute_ao_value_doc(context,
                                         ctx->ao_basis.ao_num,
                                         ctx->ao_basis.shell_num,
@@ -6048,6 +6109,12 @@ qmckl_exit_code qmckl_provide_ao_value(qmckl_context context)
                                         ctx->ao_basis.ao_value);
       }
 #else
+      /* Provide required data */
+      rc = qmckl_provide_ao_basis_shell_vgl(context);
+      if (rc != QMCKL_SUCCESS) {
+          return qmckl_failwith( context, rc, "qmckl_provide_ao_basis_shell_vgl", NULL);
+      }
+
       rc = qmckl_compute_ao_value_doc(context,
                                       ctx->ao_basis.ao_num,
                                       ctx->ao_basis.shell_num,
@@ -6064,10 +6131,15 @@ qmckl_exit_code qmckl_provide_ao_value(qmckl_context context)
                                       ctx->ao_basis.shell_vgl,
                                       ctx->ao_basis.ao_value);
 #endif
-      if (rc != QMCKL_SUCCESS) {
-        return rc;
-      }
+    }
+     #+end_src
 
+#+CALL: write_provider_post( group="ao_basis", data="ao_value" )
+
+#+RESULTS:
+#+begin_src c :comments org :tangle (eval c) :noweb yes :export none
+    if (rc != QMCKL_SUCCESS) {
+      return rc;
     }
 
     ctx->ao_basis.ao_value_date = ctx->date;
@@ -6075,7 +6147,7 @@ qmckl_exit_code qmckl_provide_ao_value(qmckl_context context)
 
   return QMCKL_SUCCESS;
 }
-     #+end_src
+#+end_src
 
 **** Test                                                          :noexport:
 
diff --git a/org/qmckl_mo.org b/org/qmckl_mo.org
index f9ae16e..6d23bf0 100644
--- a/org/qmckl_mo.org
+++ b/org/qmckl_mo.org
@@ -457,7 +457,7 @@ qmckl_get_mo_basis_mo_value(qmckl_context context,
 
   qmckl_exit_code rc;
 
-  rc = qmckl_provide_ao_value(context);
+  rc = qmckl_provide_ao_basis_ao_value(context);
   if (rc != QMCKL_SUCCESS) return rc;
 
   rc = qmckl_provide_mo_value(context);
@@ -585,7 +585,7 @@ qmckl_exit_code qmckl_provide_mo_value(qmckl_context context)
                            NULL);
   }
 
-  rc = qmckl_provide_ao_value(context);
+  rc = qmckl_provide_ao_basis_ao_value(context);
   if (rc != QMCKL_SUCCESS) {
     return qmckl_failwith( context,
                            QMCKL_NOT_PROVIDED,
diff --git a/org/qmckl_point.org b/org/qmckl_point.org
index 20fabbf..6344975 100644
--- a/org/qmckl_point.org
+++ b/org/qmckl_point.org
@@ -77,11 +77,13 @@ int main() {
 
   The following data stored in the context:
 
-  | Variable | Type           | Description                               |
-  |----------+----------------+-------------------------------------------|
-  | ~num~    | ~int64_t~      | Total number of points                    |
-  | ~date~   | ~uint64_t~     | Last modification date of the coordinates |
-  | ~coord~  | ~qmckl_matrix~ | ~num~ \times 3 matrix                     |
+  | Variable     | Type           | Description                               |
+  |--------------+----------------+-------------------------------------------|
+  | ~num~        | ~int64_t~      | Total number of points                    |
+  | ~alloc_num~ | ~int64_t~      | Numer of allocated number of points       |
+  | ~date~       | ~uint64_t~     | Last modification date of the coordinates |
+  | ~alloc_date~ | ~uint64_t~     | Last modification date of the allocation  |
+  | ~coord~      | ~qmckl_matrix~ | ~num~ \times 3 matrix                     |
 
   We consider that the matrix is stored 'transposed' and 'normal'
   corresponds to the 3 \times ~num~ matrix.
@@ -91,7 +93,9 @@ int main() {
    #+begin_src c :comments org :tangle (eval h_private_type)
 typedef struct qmckl_point_struct {
   int64_t      num;
+  int64_t      alloc_num;
   uint64_t     date;
+  uint64_t     alloc_date;
   qmckl_matrix coord;
 } qmckl_point_struct;
 
@@ -308,7 +312,7 @@ qmckl_set_point (qmckl_context context,
   assert (ctx != NULL);
 
   qmckl_exit_code rc;
-  if (ctx->point.num != num) {
+  if (num > ctx->point.alloc_num) {
 
     if (ctx->point.coord.data != NULL) {
       rc = qmckl_matrix_free(context, &(ctx->point.coord));
@@ -322,7 +326,6 @@ qmckl_set_point (qmckl_context context,
                              "qmckl_set_point",
                              NULL);
     }
-
   };
 
   ctx->point.num = num;
@@ -350,6 +353,11 @@ qmckl_set_point (qmckl_context context,
   rc = qmckl_context_touch(context);
   assert (rc == QMCKL_SUCCESS);
 
+  if (num > ctx->point.alloc_num) {
+    ctx->point.alloc_num = num;
+    ctx->point.alloc_date = ctx->point.date;
+  };
+
   return QMCKL_SUCCESS;
 
 }
diff --git a/tools/lib.org b/tools/lib.org
index fd198a9..5e3d78d 100644
--- a/tools/lib.org
+++ b/tools/lib.org
@@ -4,10 +4,10 @@
 ** Defines the name of the current file
 
    #+NAME: filename
-   #+begin_src elisp :tangle no 
+   #+begin_src elisp :tangle no
 (file-name-nondirectory (substring buffer-file-name 0 -4))
    #+end_src
-  
+
 ** Function to get the value of a property.
  #+NAME: get_value
  #+begin_src elisp :var key="Type"
@@ -15,7 +15,6 @@
   (org-entry-get nil key t))
  #+end_src
 
-
 ** Table of function arguments
 
    #+NAME: test
@@ -32,7 +31,7 @@
      | ~ldb~     | ~int64_t~        | in     | Leading dimension of array ~B~                |
      | ~C~       | ~double[n][ldc]~ | out    | Array containing the $m \times n$ matrix $C$  |
      | ~ldc~     | ~int64_t~        | in     | Leading dimension of array ~C~                |
-   
+
 
 *** Fortran-C type conversions
 
@@ -124,7 +123,7 @@ for d in parse_table(table):
         const = "const "
     else:
         const = ""
-        
+
     results += [ f"      {const}{c_type} {name}" ]
 
 results=',\n'.join(results)
@@ -146,10 +145,9 @@ return template
           const double* B,
           const int64_t ldb,
           double* const C,
-          const int64_t ldc ); 
+          const int64_t ldc );
     #+end_src
 
-
 *** Generates a C interface to the Fortran function
 
     #+NAME: generate_c_interface
@@ -258,4 +256,161 @@ return results
     #+END_SRC
 
 
-    
+
+** Creating provide functions
+
+    #+NAME: write_provider_header
+    #+BEGIN_SRC python :var group="GROUP" :var data="DATA" :results drawer :noweb yes :wrap "src c :comments org :tangle (eval h_private_func) :noweb yes :export none"
+template = "qmckl_exit_code qmckl_provide_{{ group }}_{{ data }}(qmckl_context context);"
+
+msg = template.replace("{{ group }}", group) \
+              .replace("{{ data }}", data)
+return msg
+    #+END_SRC
+
+    #+RESULTS: write_provider_header
+    #+begin_src c :comments org :tangle (eval h_private_func) :noweb yes :export none
+    qmckl_exit_code qmckl_provide_GROUP_DATA(qmckl_context context);
+    #+end_src
+
+    #+NAME: write_provider_pre
+    #+BEGIN_SRC python :var group="GROUP" :var data="DATA" :var dimension="DIMENSION" :results drawer :noweb yes :wrap "src c :comments org :tangle (eval c) :noweb yes :export none"
+template = """qmckl_exit_code qmckl_provide_{{ group }}_{{ data }}(qmckl_context context)
+{
+
+  qmckl_exit_code rc;
+
+  if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
+    return qmckl_failwith( context,
+                           QMCKL_INVALID_CONTEXT,
+                           "qmckl_provide_{{ group }}_{{ data }}",
+                           NULL);
+  }
+
+  qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
+  assert (ctx != NULL);
+
+  if (!ctx->{{ group }}.provided) {
+    return qmckl_failwith( context,
+                           QMCKL_NOT_PROVIDED,
+                           "qmckl_provide_{{ group }}_{{ data }}",
+                           NULL);
+  }
+
+  /* Compute if necessary */
+  if (ctx->point.date > ctx->{{ group }}.{{ data }}_date) {
+
+    if (ctx->point.alloc_date > ctx->{{ group }}.{{ data }}_date) {
+      if (ctx->{{ group }}.{{ data }} != NULL) {
+        rc = qmckl_free(context, ctx->{{ group }}.{{ data }});
+        assert (rc == QMCKL_SUCCESS);
+        ctx->{{ group }}.{{ data }} = NULL;
+      }
+    }
+
+    /* Allocate array */
+    if (ctx->{{ group }}.{{ data }} == NULL) {
+
+      qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero;
+      mem_info.size = {{ dimension }} * sizeof(double);
+      double* {{ data }} = (double*) qmckl_malloc(context, mem_info);
+
+      if ({{ data }} == NULL) {
+        return qmckl_failwith( context,
+                               QMCKL_ALLOCATION_FAILED,
+                               "qmckl_{{ group }}_{{ data }}",
+                               NULL);
+      }
+      ctx->{{ group }}.{{ data }} = {{ data }};
+    }
+
+"""
+
+msg = template.replace("{{ group }}", group) \
+              .replace("{{ data }}", data) \
+              .replace("{{ dimension }}", dimension)
+return msg
+    #+END_SRC
+
+    #+RESULTS: write_provider_pre
+    #+begin_src c :comments org :tangle (eval c) :noweb yes :export none
+    qmckl_exit_code qmckl_provide_GROUP_DATA(qmckl_context context)
+    {
+
+      qmckl_exit_code rc;
+
+      if (qmckl_context_check(context) == QMCKL_NULL_CONTEXT) {
+        return qmckl_failwith( context,
+                               QMCKL_INVALID_CONTEXT,
+                               "qmckl_provide_GROUP_DATA",
+                               NULL);
+      }
+
+      qmckl_context_struct* const ctx = (qmckl_context_struct*) context;
+      assert (ctx != NULL);
+
+      if (!ctx->GROUP.provided) {
+        return qmckl_failwith( context,
+                               QMCKL_NOT_PROVIDED,
+                               "qmckl_provide_GROUP_DATA",
+                               NULL);
+      }
+
+      /* Compute if necessary */
+      if (ctx->point.date > ctx->GROUP.DATA_date) {
+
+        if (ctx->point.alloc_date > ctx->GROUP.DATA_date) {
+          rc = qmckl_free(context, ctx->GROUP.DATA);
+          assert (rc == QMCKL_SUCCESS);
+          ctx->GROUP.DATA = NULL;
+        }
+
+        /* Allocate array */
+        if (ctx->GROUP.DATA == NULL) {
+
+          qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero;
+          mem_info.size = DIMENSION * sizeof(double);
+          double* DATA = (double*) qmckl_malloc(context, mem_info);
+
+          if (DATA == NULL) {
+            return qmckl_failwith( context,
+                                   QMCKL_ALLOCATION_FAILED,
+                                   "qmckl_GROUP_DATA",
+                                   NULL);
+          }
+          ctx->GROUP.DATA = DATA;
+        }
+
+    #+end_src
+
+    #+NAME: write_provider_post
+    #+BEGIN_SRC python :var group="BASIS" :var data="DATA" :results drawer :noweb yes :wrap "src c :comments org :tangle (eval c) :noweb yes :export none"
+template = """    if (rc != QMCKL_SUCCESS) {
+      return rc;
+    }
+
+    ctx->{{ group }}.{{ data }}_date = ctx->date;
+  }
+
+  return QMCKL_SUCCESS;
+}
+"""
+
+msg = template.replace("{{ group }}", group) \
+              .replace("{{ data }}", data)
+
+return msg
+    #+END_SRC
+
+    #+RESULTS: write_provider_post
+    #+begin_src c :comments org :tangle (eval c) :noweb yes :export none
+        if (rc != QMCKL_SUCCESS) {
+          return rc;
+        }
+
+        ctx->BASIS.DATA_date = ctx->date;
+      }
+
+      return QMCKL_SUCCESS;
+    }
+    #+end_src

From 1641d50583d3118c82d8c5ea4a575e48ba145d92 Mon Sep 17 00:00:00 2001
From: Anthony Scemama 
Date: Mon, 11 Jul 2022 10:38:53 +0200
Subject: [PATCH 099/100] Update Makefile.am

---
 Makefile.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile.am b/Makefile.am
index ea302d3..89fa63e 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -207,7 +207,7 @@ $(qmckl_include_i):  $(qmckl_h) $(process_header_py)
 
 
 $(qmckl_py): $(qmckl_i) $(qmckl_include_i)
-	swig -Iinclude -Ipython/src -python -py3 -builtin -o $(qmckl_wrap_c) $(qmckl_i)
+	$(srcdir)/tools/missing wig -Iinclude -Ipython/src -python -py3 -builtin -o $(qmckl_wrap_c) $(qmckl_i)
 
 $(qmckl_wrap_c): $(qmckl_py)
 

From 4f573a4d075cce4c96b18599d33225bf66023306 Mon Sep 17 00:00:00 2001
From: Anthony Scemama 
Date: Mon, 11 Jul 2022 10:55:47 +0200
Subject: [PATCH 100/100] If swig not found, don't fail

---
 Makefile.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile.am b/Makefile.am
index 89fa63e..827a27e 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -207,7 +207,7 @@ $(qmckl_include_i):  $(qmckl_h) $(process_header_py)
 
 
 $(qmckl_py): $(qmckl_i) $(qmckl_include_i)
-	$(srcdir)/tools/missing wig -Iinclude -Ipython/src -python -py3 -builtin -o $(qmckl_wrap_c) $(qmckl_i)
+	$(srcdir)/tools/missing swig -Iinclude -Ipython/src -python -py3 -builtin -o $(qmckl_wrap_c) $(qmckl_i) || :
 
 $(qmckl_wrap_c): $(qmckl_py)