mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2025-01-03 10:06:09 +01:00
Merge branch 'gpu' into master
This commit is contained in:
commit
bc43113b6f
2
.github/workflows/test-build.yml
vendored
2
.github/workflows/test-build.yml
vendored
@ -2,9 +2,7 @@ name: test-build
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
x86_ubuntu:
|
||||
|
@ -2634,9 +2634,10 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) {
|
||||
}
|
||||
}
|
||||
|
||||
rc = QMCKL_SUCCESS;
|
||||
#ifdef HAVE_HPC
|
||||
rc = qmckl_finalize_basis_hpc(context);
|
||||
#else
|
||||
rc = QMCKL_SUCCESS;
|
||||
#endif
|
||||
|
||||
return rc;
|
||||
|
@ -84,8 +84,8 @@ are not intended to be passed to external codes.
|
||||
|
||||
#+begin_src c :comments org :tangle (eval h_private_type) :exports none
|
||||
typedef struct qmckl_vector {
|
||||
int64_t size;
|
||||
double* restrict data;
|
||||
int64_t size;
|
||||
} qmckl_vector;
|
||||
#+end_src
|
||||
|
||||
@ -160,8 +160,8 @@ qmckl_vector_free( qmckl_context context,
|
||||
|
||||
#+begin_src c :comments org :tangle (eval h_private_type) :exports none
|
||||
typedef struct qmckl_matrix {
|
||||
int64_t size[2];
|
||||
double* restrict data;
|
||||
int64_t size[2];
|
||||
} qmckl_matrix;
|
||||
#+end_src
|
||||
|
||||
@ -245,9 +245,9 @@ qmckl_matrix_free( qmckl_context context,
|
||||
#define QMCKL_TENSOR_ORDER_MAX 16
|
||||
|
||||
typedef struct qmckl_tensor {
|
||||
double* restrict data;
|
||||
int64_t order;
|
||||
int64_t size[QMCKL_TENSOR_ORDER_MAX];
|
||||
double* restrict data;
|
||||
} qmckl_tensor;
|
||||
#+end_src
|
||||
|
||||
|
@ -875,7 +875,7 @@ qmckl_set_jastrow_type_nucl_vector(qmckl_context context,
|
||||
}
|
||||
|
||||
if (ctx->jastrow.type_nucl_vector != NULL) {
|
||||
qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.type_nucl_vector);
|
||||
rc = qmckl_free(context, ctx->jastrow.type_nucl_vector);
|
||||
if (rc != QMCKL_SUCCESS) {
|
||||
return qmckl_failwith( context, rc,
|
||||
"qmckl_set_type_nucl_vector",
|
||||
@ -934,7 +934,7 @@ qmckl_set_jastrow_aord_vector(qmckl_context context,
|
||||
}
|
||||
|
||||
if (ctx->jastrow.aord_vector != NULL) {
|
||||
qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.aord_vector);
|
||||
rc = qmckl_free(context, ctx->jastrow.aord_vector);
|
||||
if (rc != QMCKL_SUCCESS) {
|
||||
return qmckl_failwith( context, rc,
|
||||
"qmckl_set_ord_vector",
|
||||
@ -997,7 +997,7 @@ qmckl_set_jastrow_bord_vector(qmckl_context context,
|
||||
}
|
||||
|
||||
if (ctx->jastrow.bord_vector != NULL) {
|
||||
qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.bord_vector);
|
||||
rc = qmckl_free(context, ctx->jastrow.bord_vector);
|
||||
if (rc != QMCKL_SUCCESS) {
|
||||
return qmckl_failwith( context, rc,
|
||||
"qmckl_set_ord_vector",
|
||||
@ -1067,7 +1067,7 @@ qmckl_set_jastrow_cord_vector(qmckl_context context,
|
||||
}
|
||||
|
||||
if (ctx->jastrow.cord_vector != NULL) {
|
||||
qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.cord_vector);
|
||||
rc = qmckl_free(context, ctx->jastrow.cord_vector);
|
||||
if (rc != QMCKL_SUCCESS) {
|
||||
return qmckl_failwith( context, rc,
|
||||
"qmckl_set_ord_vector",
|
||||
@ -1434,8 +1434,7 @@ qmckl_exit_code qmckl_provide_asymp_jasb(qmckl_context context)
|
||||
ctx->jastrow.asymp_jasb = asymp_jasb;
|
||||
}
|
||||
|
||||
qmckl_exit_code rc =
|
||||
qmckl_compute_asymp_jasb(context,
|
||||
rc = qmckl_compute_asymp_jasb(context,
|
||||
ctx->jastrow.bord_num,
|
||||
ctx->jastrow.bord_vector,
|
||||
rescale_factor_kappa_ee,
|
||||
@ -1516,10 +1515,6 @@ qmckl_exit_code qmckl_compute_asymp_jasb (
|
||||
const double rescale_factor_kappa_ee,
|
||||
double* const asymp_jasb ) {
|
||||
|
||||
double kappa_inv, x, asym_one;
|
||||
|
||||
kappa_inv = 1.0 / rescale_factor_kappa_ee;
|
||||
|
||||
if (context == QMCKL_NULL_CONTEXT){
|
||||
return QMCKL_INVALID_CONTEXT;
|
||||
}
|
||||
@ -1528,14 +1523,15 @@ qmckl_exit_code qmckl_compute_asymp_jasb (
|
||||
return QMCKL_INVALID_ARG_2;
|
||||
}
|
||||
|
||||
asym_one = bord_vector[0] * kappa_inv / (1.0 + bord_vector[1] * kappa_inv);
|
||||
const double kappa_inv = 1.0 / rescale_factor_kappa_ee;
|
||||
const double asym_one = bord_vector[0] * kappa_inv / (1.0 + bord_vector[1] * kappa_inv);
|
||||
asymp_jasb[0] = asym_one;
|
||||
asymp_jasb[1] = 0.5 * asym_one;
|
||||
|
||||
for (int i = 0 ; i <= 1; ++i) {
|
||||
x = kappa_inv;
|
||||
double x = kappa_inv;
|
||||
for (int p = 1; p < bord_num; ++p){
|
||||
x = x * kappa_inv;
|
||||
x *= kappa_inv;
|
||||
asymp_jasb[i] = asymp_jasb[i] + bord_vector[p + 1] * x;
|
||||
}
|
||||
}
|
||||
@ -1718,8 +1714,7 @@ qmckl_exit_code qmckl_provide_factor_ee(qmckl_context context)
|
||||
ctx->jastrow.factor_ee = factor_ee;
|
||||
}
|
||||
|
||||
qmckl_exit_code rc =
|
||||
qmckl_compute_factor_ee(context,
|
||||
rc = qmckl_compute_factor_ee(context,
|
||||
ctx->electron.walk_num,
|
||||
ctx->electron.num,
|
||||
ctx->electron.up_num,
|
||||
@ -2060,8 +2055,7 @@ qmckl_exit_code qmckl_provide_factor_ee_deriv_e(qmckl_context context)
|
||||
ctx->jastrow.factor_ee_deriv_e = factor_ee_deriv_e;
|
||||
}
|
||||
|
||||
qmckl_exit_code rc =
|
||||
qmckl_compute_factor_ee_deriv_e(context,
|
||||
rc = qmckl_compute_factor_ee_deriv_e(context,
|
||||
ctx->electron.walk_num,
|
||||
ctx->electron.num,
|
||||
ctx->electron.up_num,
|
||||
@ -2483,8 +2477,7 @@ qmckl_exit_code qmckl_provide_factor_en(qmckl_context context)
|
||||
ctx->jastrow.factor_en = factor_en;
|
||||
}
|
||||
|
||||
qmckl_exit_code rc =
|
||||
qmckl_compute_factor_en(context,
|
||||
rc = qmckl_compute_factor_en(context,
|
||||
ctx->electron.walk_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
@ -2831,8 +2824,7 @@ qmckl_exit_code qmckl_provide_factor_en_deriv_e(qmckl_context context)
|
||||
ctx->jastrow.factor_en_deriv_e = factor_en_deriv_e;
|
||||
}
|
||||
|
||||
qmckl_exit_code rc =
|
||||
qmckl_compute_factor_en_deriv_e(context,
|
||||
rc = qmckl_compute_factor_en_deriv_e(context,
|
||||
ctx->electron.walk_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
@ -3250,8 +3242,7 @@ qmckl_exit_code qmckl_provide_een_rescaled_e(qmckl_context context)
|
||||
ctx->jastrow.een_rescaled_e = een_rescaled_e;
|
||||
}
|
||||
|
||||
qmckl_exit_code rc =
|
||||
qmckl_compute_een_rescaled_e(context,
|
||||
rc = qmckl_compute_een_rescaled_e(context,
|
||||
ctx->electron.walk_num,
|
||||
ctx->electron.num,
|
||||
ctx->jastrow.cord_num,
|
||||
@ -3584,8 +3575,7 @@ qmckl_exit_code qmckl_provide_een_rescaled_e_deriv_e(qmckl_context context)
|
||||
ctx->jastrow.een_rescaled_e_deriv_e = een_rescaled_e_deriv_e;
|
||||
}
|
||||
|
||||
qmckl_exit_code rc =
|
||||
qmckl_compute_factor_een_rescaled_e_deriv_e(context,
|
||||
rc = qmckl_compute_factor_een_rescaled_e_deriv_e(context,
|
||||
ctx->electron.walk_num,
|
||||
ctx->electron.num,
|
||||
ctx->jastrow.cord_num,
|
||||
@ -3964,8 +3954,7 @@ qmckl_exit_code qmckl_provide_een_rescaled_n(qmckl_context context)
|
||||
ctx->jastrow.een_rescaled_n = een_rescaled_n;
|
||||
}
|
||||
|
||||
qmckl_exit_code rc =
|
||||
qmckl_compute_een_rescaled_n(context,
|
||||
rc = qmckl_compute_een_rescaled_n(context,
|
||||
ctx->electron.walk_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
@ -4303,8 +4292,7 @@ qmckl_exit_code qmckl_provide_een_rescaled_n_deriv_e(qmckl_context context)
|
||||
ctx->jastrow.een_rescaled_n_deriv_e = een_rescaled_n_deriv_e;
|
||||
}
|
||||
|
||||
qmckl_exit_code rc =
|
||||
qmckl_compute_factor_een_rescaled_n_deriv_e(context,
|
||||
rc = qmckl_compute_factor_een_rescaled_n_deriv_e(context,
|
||||
ctx->electron.walk_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
@ -4805,8 +4793,7 @@ qmckl_exit_code qmckl_provide_cord_vect_full(qmckl_context context)
|
||||
ctx->jastrow.cord_vect_full = cord_vect_full;
|
||||
}
|
||||
|
||||
qmckl_exit_code rc =
|
||||
qmckl_compute_cord_vect_full(context,
|
||||
rc = qmckl_compute_cord_vect_full(context,
|
||||
ctx->nucleus.num,
|
||||
ctx->jastrow.dim_cord_vect,
|
||||
ctx->jastrow.type_nucl_num,
|
||||
@ -4856,8 +4843,7 @@ qmckl_exit_code qmckl_provide_lkpm_combined_index(qmckl_context context)
|
||||
ctx->jastrow.lkpm_combined_index = lkpm_combined_index;
|
||||
}
|
||||
|
||||
qmckl_exit_code rc =
|
||||
qmckl_compute_lkpm_combined_index(context,
|
||||
rc = qmckl_compute_lkpm_combined_index(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->jastrow.dim_cord_vect,
|
||||
ctx->jastrow.lkpm_combined_index);
|
||||
@ -4942,9 +4928,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context)
|
||||
}
|
||||
#endif
|
||||
|
||||
if(default_compute) {
|
||||
qmckl_exit_code rc =
|
||||
qmckl_compute_tmp_c(context,
|
||||
rc = qmckl_compute_tmp_c(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
@ -4955,7 +4939,6 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context)
|
||||
if (rc != QMCKL_SUCCESS) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ctx->jastrow.tmp_c_date = ctx->date;
|
||||
@ -4997,48 +4980,8 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context)
|
||||
ctx->jastrow.dtmp_c = dtmp_c;
|
||||
}
|
||||
|
||||
/* Choose the correct compute function (depending on offload type) */
|
||||
bool default_compute = true;
|
||||
|
||||
#ifdef HAVE_OPENACC_OFFLOAD
|
||||
if(ctx->jastrow.offload_type == OFFLOAD_OPENACC) {
|
||||
qmckl_exit_code rc =
|
||||
qmckl_compute_dtmp_c_acc_offload(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e_deriv_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.dtmp_c);
|
||||
default_compute = false;
|
||||
if (rc != QMCKL_SUCCESS) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUBLAS_OFFLOAD
|
||||
if(ctx->jastrow.offload_type == OFFLOAD_CUBLAS) {
|
||||
qmckl_exit_code rc =
|
||||
qmckl_compute_dtmp_c_cublas_offload(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e_deriv_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.dtmp_c);
|
||||
default_compute = false;
|
||||
if (rc != QMCKL_SUCCESS) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if(default_compute) {
|
||||
qmckl_exit_code rc =
|
||||
qmckl_compute_dtmp_c(context,
|
||||
rc = qmckl_compute_dtmp_c(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
@ -5049,7 +4992,6 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context)
|
||||
if (rc != QMCKL_SUCCESS) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
ctx->jastrow.dtmp_c_date = ctx->date;
|
||||
}
|
||||
@ -6509,8 +6451,7 @@ qmckl_exit_code qmckl_provide_factor_een(qmckl_context context)
|
||||
ctx->jastrow.factor_een = factor_een;
|
||||
}
|
||||
|
||||
qmckl_exit_code rc =
|
||||
qmckl_compute_factor_een(context,
|
||||
rc = qmckl_compute_factor_een(context,
|
||||
ctx->electron.walk_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
@ -7022,8 +6963,7 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context)
|
||||
ctx->jastrow.factor_een_deriv_e = factor_een_deriv_e;
|
||||
}
|
||||
|
||||
qmckl_exit_code rc =
|
||||
qmckl_compute_factor_een_deriv_e(context,
|
||||
rc = qmckl_compute_factor_een_deriv_e(context,
|
||||
ctx->electron.walk_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
|
@ -849,13 +849,13 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
|
||||
|
||||
int64_t n0 = nidx-4;
|
||||
n0 = n0 < 0 ? 0 : n0;
|
||||
for (int64_t n=n0 ; n < nidx ; n+=1) {
|
||||
const double* restrict ck = coef_normalized_t + idx[n]*mo_num;
|
||||
const double a1 = av1[n];
|
||||
const double a2 = av2[n];
|
||||
const double a3 = av3[n];
|
||||
const double a4 = av4[n];
|
||||
const double a5 = av5[n];
|
||||
for (int64_t m=n0 ; m < nidx ; m+=1) {
|
||||
const double* restrict ck = coef_normalized_t + idx[m]*mo_num;
|
||||
const double a1 = av1[m];
|
||||
const double a2 = av2[m];
|
||||
const double a3 = av3[m];
|
||||
const double a4 = av4[m];
|
||||
const double a5 = av5[m];
|
||||
|
||||
#ifdef HAVE_OPENMP
|
||||
#pragma omp simd
|
||||
|
@ -965,7 +965,7 @@ qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context,
|
||||
rc = qmckl_woodbury_3(context, LDS, Dim, Updates_3block, Updates_index_3block, breakdown, Slater_inv, determinant);
|
||||
if (rc != 0) { // Send the entire block to slagel_splitting
|
||||
uint64_t l = 0;
|
||||
rc = qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block,
|
||||
(void) qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block,
|
||||
breakdown, Slater_inv, later_updates + (Dim * later), later_index + later, &l, determinant);
|
||||
later = later + l;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user