1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2024-12-31 08:36:11 +01:00

Optimized Jasrow

This commit is contained in:
Anthony Scemama 2023-03-31 19:58:30 +02:00
parent e4023b426e
commit 2153cfccf6

View File

@ -2393,9 +2393,6 @@ qmckl_exit_code qmckl_compute_jastrow_champ_factor_ee_hpc (
const double* asymp_jasb,
double* const factor_ee ) {
int ipar;
double x, x1, spin_fact, power_ser;
if (context == QMCKL_NULL_CONTEXT) {
return QMCKL_INVALID_CONTEXT;
}
@ -2417,11 +2414,11 @@ qmckl_exit_code qmckl_compute_jastrow_champ_factor_ee_hpc (
size_t ishift = nw * elec_num * elec_num;
for (int i = 0; i < elec_num; ++i ) {
for (int j = 0; j < i; ++j) {
x = ee_distance_rescaled[j + i * elec_num + ishift];
x1 = x;
power_ser = 0.0;
spin_fact = 1.0;
ipar = 0; // index of asymp_jasb
double x = ee_distance_rescaled[j + i * elec_num + ishift];
const double x1 = x;
double power_ser = 0.0;
double spin_fact = 1.0;
int ipar = 0; // index of asymp_jasb
for (int p = 1; p < bord_num; ++p) {
x = x * x1;
@ -2812,12 +2809,6 @@ qmckl_exit_code qmckl_compute_jastrow_champ_factor_ee_deriv_e_hpc(
const double* ee_distance_rescaled_deriv_e,
double* const factor_ee_deriv_e ) {
double pow_ser_g[3];
double dx[4];
double x, spin_fact, y;
double den, invden, invden2, invden3, xinv;
double lap1, lap2, lap3, third;
if (context == QMCKL_NULL_CONTEXT) {
return QMCKL_INVALID_CONTEXT;
}
@ -2843,23 +2834,21 @@ qmckl_exit_code qmckl_compute_jastrow_champ_factor_ee_deriv_e_hpc(
}
}
third = 1.0 / 3.0;
const double third = 1.0 / 3.0;
for (int nw = 0; nw < walk_num; ++nw) {
for (int i = 0; i < elec_num; ++i) {
for (int j = 0; j < elec_num; ++j) {
x = ee_distance_rescaled[j + i * elec_num + nw * elec_num * elec_num];
if (fabs(x) < 1.0e-18) continue;
for (int ii = 0; ii < 3; ++ii){
pow_ser_g[ii] = 0.0;
}
spin_fact = 1.0;
den = 1.0 + b_vector[1] * x;
invden = 1.0 / den;
invden2 = invden * invden;
invden3 = invden2 * invden;
xinv = 1.0 / (x + 1.0e-18);
const double x0 = ee_distance_rescaled[j + i * elec_num + nw * elec_num * elec_num];
if (fabs(x0) < 1.0e-18) continue;
double spin_fact = 1.0;
const double den = 1.0 + b_vector[1] * x0;
const double invden = 1.0 / den;
const double invden2 = invden * invden;
const double invden3 = invden2 * invden;
const double xinv = 1.0 / (x0 + 1.0e-18);
double dx[4];
dx[0] = ee_distance_rescaled_deriv_e[0 \
+ j * 4 + i * 4 * elec_num \
+ nw * 4 * elec_num * elec_num];
@ -2877,14 +2866,15 @@ qmckl_exit_code qmckl_compute_jastrow_champ_factor_ee_deriv_e_hpc(
spin_fact = 0.5;
}
lap1 = 0.0;
lap2 = 0.0;
lap3 = 0.0;
double lap1 = 0.0;
double lap2 = 0.0;
double lap3 = 0.0;
double pow_ser_g[3] = {0., 0., 0.};
for (int ii = 0; ii < 3; ++ii) {
x = ee_distance_rescaled[j + i * elec_num + nw * elec_num * elec_num];
double x = x0;
if (fabs(x) < 1.0e-18) continue;
for (int p = 2; p < bord_num+1; ++p) {
y = p * b_vector[(p-1) + 1] * x;
const double y = p * b_vector[(p-1) + 1] * x;
pow_ser_g[ii] = pow_ser_g[ii] + y * dx[ii];
lap1 = lap1 + (p - 1) * y * xinv * dx[ii] * dx[ii];
lap2 = lap2 + y;
@ -3865,40 +3855,6 @@ end function qmckl_compute_jastrow_champ_asymp_jasa_f
end function qmckl_compute_jastrow_champ_asymp_jasa
#+end_src
#+begin_src c :comments org :tangle (eval c) :noweb yes
/*
qmckl_exit_code qmckl_compute_jastrow_champ_asymp_jasa (
const qmckl_context context,
const int64_t aord_num,
const int64_t type_nucl_num,
const double* a_vector,
double* const rescale_factor_en,
double* const asymp_jasa ) {
if (context == QMCKL_NULL_CONTEXT){
return QMCKL_INVALID_CONTEXT;
}
if (aord_num < 0) {
return QMCKL_INVALID_ARG_2;
}
for (int i = 0 ; i <= type_nucl_num; ++i) {
const double kappa_inv = 1.0 / rescale_factor_en[i];
asymp_jasa[i] = a_vector[aord_num*i] * kappa_inv / (1.0 + a_vector[1 + aord_num*i] * kappa_inv);
double x = kappa_inv;
for (int p = 1; p < aord_num; ++p){
x *= kappa_inv;
asymp_jasa[i] = asymp_jasa[i] + a_vector[p + 1 + aord_num*i] * x;
}
}
return QMCKL_SUCCESS;
}
*/
#+end_src
#+CALL: generate_c_header(table=qmckl_asymp_jasa_args,rettyp=get_value("CRetType"),fname=get_value("Name"))
#+RESULTS:
@ -5671,28 +5627,28 @@ end function qmckl_compute_een_rescaled_e_doc_f
#+begin_src c :comments org :tangle (eval c) :noweb yes
qmckl_exit_code qmckl_compute_een_rescaled_e_hpc (
const qmckl_context context,
const int64_t walk_num,
const int64_t elec_num,
const int64_t cord_num,
const double rescale_factor_ee,
const double* ee_distance,
double* const een_rescaled_e ) {
const qmckl_context context,
const int64_t walk_num,
const int64_t elec_num,
const int64_t cord_num,
const double rescale_factor_ee,
const double* ee_distance,
double* const een_rescaled_e ) {
if (context == QMCKL_NULL_CONTEXT) {
return QMCKL_INVALID_CONTEXT;
return QMCKL_INVALID_CONTEXT;
}
if (walk_num <= 0) {
return QMCKL_INVALID_ARG_2;
return QMCKL_INVALID_ARG_2;
}
if (elec_num <= 0) {
return QMCKL_INVALID_ARG_3;
return QMCKL_INVALID_ARG_3;
}
if (cord_num < 0) {
return QMCKL_INVALID_ARG_4;
return QMCKL_INVALID_ARG_4;
}
// Prepare table of exponentiated distances raised to appropriate power
@ -5702,7 +5658,6 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc (
een_rescaled_e[kk]= 0.0;
}
double x;
const int64_t elec_pairs = (elec_num * (elec_num - 1)) / 2;
const int64_t len_een_ij = elec_pairs * (cord_num + 1);
@ -5712,7 +5667,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc (
// len_een_ij = elec_pairs * (cord_num + 1);
qmckl_memory_info_struct mem_info = qmckl_memory_info_struct_zero;
mem_info.size = len_een_ij * sizeof(double);
double* een_rescaled_e_ij = (double*) qmckl_malloc(context, mem_info);
double* const restrict een_rescaled_e_ij = (double*) qmckl_malloc(context, mem_info);
if (een_rescaled_e_ij == NULL) {
return qmckl_failwith( context,
QMCKL_ALLOCATION_FAILED,
@ -5721,17 +5676,20 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc (
}
/*
for (int nw = 0; nw < walk_num; ++nw) {
for (int nw = 0; nw < walk_num; ++nw) {
for (int l = 0; l < (cord_num + 1); ++l) {
for (int i = 0; i < elec_num; ++i) {
for (int j = 0; j < elec_num; ++j) {
een_rescaled_e[j + i*elec_num + l*elec_num*elec_num + nw*(cord_num+1)*elec_num*elec_num]= 0.0;
}
}
for (int i = 0; i < elec_num; ++i) {
for (int j = 0; j < elec_num; ++j) {
een_rescaled_e[j + i*elec_num + l*elec_num*elec_num + nw*(cord_num+1)*elec_num*elec_num]= 0.0;
}
}
}
}
}
,*/
#ifdef HAVE_OPENMP
#pragma omp parallel for
#endif
for (int nw = 0; nw < walk_num; ++nw) {
for (int kk = 0; kk < len_een_ij; ++kk) {
@ -5745,7 +5703,7 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc (
for (int j = 0; j < i; ++j) {
// een_rescaled_e_ij(kk, 2) = dexp(-rescale_factor_ee * ee_distance(i, j, nw));
een_rescaled_e_ij[kk + elec_pairs] = exp(-rescale_factor_ee * \
ee_distance[j + i*elec_num + nw*(elec_num*elec_num)]);
ee_distance[j + i*elec_num + nw*(elec_num*elec_num)]);
kk += 1;
}
}
@ -5753,38 +5711,39 @@ qmckl_exit_code qmckl_compute_een_rescaled_e_hpc (
for (int l = 2; l < (cord_num+1); ++l) {
for (int k = 0; k < elec_pairs; ++k) {
// een_rescaled_e_ij(k, l + 1) = een_rescaled_e_ij(k, l + 1 - 1) * een_rescaled_e_ij(k, 2)
// een_rescaled_e_ij(k, l + 1) = een_rescaled_e_ij(k, l + 1 - 1) * een_rescaled_e_ij(k, 2)
een_rescaled_e_ij[k+l*elec_pairs] = een_rescaled_e_ij[k + (l - 1)*elec_pairs] * \
een_rescaled_e_ij[k + elec_pairs];
een_rescaled_e_ij[k + elec_pairs];
}
}
// prepare the actual een table
for (int i = 0; i < elec_num; ++i){
for (int j = 0; j < elec_num; ++j) {
een_rescaled_e[j + i*elec_num + 0 + nw*(cord_num+1)*elec_num*elec_num] = 1.0;
double* const een_rescaled_e_ = &(een_rescaled_e[nw*(cord_num+1)*elec_num*elec_num]);
// prepare the actual een table
for (int i = 0; i < elec_num; ++i){
for (int j = 0; j < elec_num; ++j) {
een_rescaled_e_[j + i*elec_num] = 1.0;
}
}
}
// Up to here it should work.
for ( int l = 1; l < (cord_num+1); ++l) {
int k = 0;
for (int i = 0; i < elec_num; ++i) {
for (int j = 0; j < i; ++j) {
x = een_rescaled_e_ij[k + l*elec_pairs];
een_rescaled_e[j + i*elec_num + l*elec_num*elec_num + nw*elec_num*elec_num*(cord_num+1)] = x;
een_rescaled_e[i + j*elec_num + l*elec_num*elec_num + nw*elec_num*elec_num*(cord_num+1)] = x;
k = k + 1;
for ( int l = 1; l < (cord_num+1); ++l) {
int k = 0;
double* const een_rescaled_e__ = &(een_rescaled_e_[l*elec_num*elec_num]);
for (int i = 0; i < elec_num; ++i) {
for (int j = 0; j < i; ++j) {
const double x = een_rescaled_e_ij[k + l*elec_pairs];
een_rescaled_e__[j + i*elec_num] = x;
een_rescaled_e__[i + j*elec_num] = x;
k = k + 1;
}
}
}
}
for (int l = 0; l < (cord_num + 1); ++l) {
for (int j = 0; j < elec_num; ++j) {
een_rescaled_e[j + j*elec_num + l*elec_num*elec_num + nw*elec_num*elec_num*(cord_num+1)] = 0.0;
for (int l = 0; l < (cord_num + 1); ++l) {
for (int j = 0; j < elec_num; ++j) {
een_rescaled_e[j + j*elec_num + l*elec_num*elec_num + nw*elec_num*elec_num*(cord_num+1)] = 0.0;
}
}
}
}