1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2025-01-03 10:06:09 +01:00

Merge branch 'gpu' into master

This commit is contained in:
Aurélien Delval 2022-04-05 11:46:12 +02:00 committed by GitHub
commit bc43113b6f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 157 additions and 218 deletions

View File

@ -2,9 +2,7 @@ name: test-build
on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
jobs:
x86_ubuntu:

View File

@ -2634,9 +2634,10 @@ qmckl_exit_code qmckl_finalize_basis(qmckl_context context) {
}
}
rc = QMCKL_SUCCESS;
#ifdef HAVE_HPC
rc = qmckl_finalize_basis_hpc(context);
#else
rc = QMCKL_SUCCESS;
#endif
return rc;

View File

@ -84,8 +84,8 @@ are not intended to be passed to external codes.
#+begin_src c :comments org :tangle (eval h_private_type) :exports none
typedef struct qmckl_vector {
int64_t size;
double* restrict data;
int64_t size;
} qmckl_vector;
#+end_src
@ -160,8 +160,8 @@ qmckl_vector_free( qmckl_context context,
#+begin_src c :comments org :tangle (eval h_private_type) :exports none
typedef struct qmckl_matrix {
int64_t size[2];
double* restrict data;
int64_t size[2];
} qmckl_matrix;
#+end_src
@ -245,9 +245,9 @@ qmckl_matrix_free( qmckl_context context,
#define QMCKL_TENSOR_ORDER_MAX 16
typedef struct qmckl_tensor {
double* restrict data;
int64_t order;
int64_t size[QMCKL_TENSOR_ORDER_MAX];
double* restrict data;
} qmckl_tensor;
#+end_src

View File

@ -875,7 +875,7 @@ qmckl_set_jastrow_type_nucl_vector(qmckl_context context,
}
if (ctx->jastrow.type_nucl_vector != NULL) {
qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.type_nucl_vector);
rc = qmckl_free(context, ctx->jastrow.type_nucl_vector);
if (rc != QMCKL_SUCCESS) {
return qmckl_failwith( context, rc,
"qmckl_set_type_nucl_vector",
@ -934,7 +934,7 @@ qmckl_set_jastrow_aord_vector(qmckl_context context,
}
if (ctx->jastrow.aord_vector != NULL) {
qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.aord_vector);
rc = qmckl_free(context, ctx->jastrow.aord_vector);
if (rc != QMCKL_SUCCESS) {
return qmckl_failwith( context, rc,
"qmckl_set_ord_vector",
@ -997,7 +997,7 @@ qmckl_set_jastrow_bord_vector(qmckl_context context,
}
if (ctx->jastrow.bord_vector != NULL) {
qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.bord_vector);
rc = qmckl_free(context, ctx->jastrow.bord_vector);
if (rc != QMCKL_SUCCESS) {
return qmckl_failwith( context, rc,
"qmckl_set_ord_vector",
@ -1067,7 +1067,7 @@ qmckl_set_jastrow_cord_vector(qmckl_context context,
}
if (ctx->jastrow.cord_vector != NULL) {
qmckl_exit_code rc = qmckl_free(context, ctx->jastrow.cord_vector);
rc = qmckl_free(context, ctx->jastrow.cord_vector);
if (rc != QMCKL_SUCCESS) {
return qmckl_failwith( context, rc,
"qmckl_set_ord_vector",
@ -1434,8 +1434,7 @@ qmckl_exit_code qmckl_provide_asymp_jasb(qmckl_context context)
ctx->jastrow.asymp_jasb = asymp_jasb;
}
qmckl_exit_code rc =
qmckl_compute_asymp_jasb(context,
rc = qmckl_compute_asymp_jasb(context,
ctx->jastrow.bord_num,
ctx->jastrow.bord_vector,
rescale_factor_kappa_ee,
@ -1516,10 +1515,6 @@ qmckl_exit_code qmckl_compute_asymp_jasb (
const double rescale_factor_kappa_ee,
double* const asymp_jasb ) {
double kappa_inv, x, asym_one;
kappa_inv = 1.0 / rescale_factor_kappa_ee;
if (context == QMCKL_NULL_CONTEXT){
return QMCKL_INVALID_CONTEXT;
}
@ -1528,14 +1523,15 @@ qmckl_exit_code qmckl_compute_asymp_jasb (
return QMCKL_INVALID_ARG_2;
}
asym_one = bord_vector[0] * kappa_inv / (1.0 + bord_vector[1] * kappa_inv);
const double kappa_inv = 1.0 / rescale_factor_kappa_ee;
const double asym_one = bord_vector[0] * kappa_inv / (1.0 + bord_vector[1] * kappa_inv);
asymp_jasb[0] = asym_one;
asymp_jasb[1] = 0.5 * asym_one;
for (int i = 0 ; i <= 1; ++i) {
x = kappa_inv;
double x = kappa_inv;
for (int p = 1; p < bord_num; ++p){
x = x * kappa_inv;
x *= kappa_inv;
asymp_jasb[i] = asymp_jasb[i] + bord_vector[p + 1] * x;
}
}
@ -1718,8 +1714,7 @@ qmckl_exit_code qmckl_provide_factor_ee(qmckl_context context)
ctx->jastrow.factor_ee = factor_ee;
}
qmckl_exit_code rc =
qmckl_compute_factor_ee(context,
rc = qmckl_compute_factor_ee(context,
ctx->electron.walk_num,
ctx->electron.num,
ctx->electron.up_num,
@ -2060,8 +2055,7 @@ qmckl_exit_code qmckl_provide_factor_ee_deriv_e(qmckl_context context)
ctx->jastrow.factor_ee_deriv_e = factor_ee_deriv_e;
}
qmckl_exit_code rc =
qmckl_compute_factor_ee_deriv_e(context,
rc = qmckl_compute_factor_ee_deriv_e(context,
ctx->electron.walk_num,
ctx->electron.num,
ctx->electron.up_num,
@ -2483,8 +2477,7 @@ qmckl_exit_code qmckl_provide_factor_en(qmckl_context context)
ctx->jastrow.factor_en = factor_en;
}
qmckl_exit_code rc =
qmckl_compute_factor_en(context,
rc = qmckl_compute_factor_en(context,
ctx->electron.walk_num,
ctx->electron.num,
ctx->nucleus.num,
@ -2831,8 +2824,7 @@ qmckl_exit_code qmckl_provide_factor_en_deriv_e(qmckl_context context)
ctx->jastrow.factor_en_deriv_e = factor_en_deriv_e;
}
qmckl_exit_code rc =
qmckl_compute_factor_en_deriv_e(context,
rc = qmckl_compute_factor_en_deriv_e(context,
ctx->electron.walk_num,
ctx->electron.num,
ctx->nucleus.num,
@ -3250,8 +3242,7 @@ qmckl_exit_code qmckl_provide_een_rescaled_e(qmckl_context context)
ctx->jastrow.een_rescaled_e = een_rescaled_e;
}
qmckl_exit_code rc =
qmckl_compute_een_rescaled_e(context,
rc = qmckl_compute_een_rescaled_e(context,
ctx->electron.walk_num,
ctx->electron.num,
ctx->jastrow.cord_num,
@ -3584,8 +3575,7 @@ qmckl_exit_code qmckl_provide_een_rescaled_e_deriv_e(qmckl_context context)
ctx->jastrow.een_rescaled_e_deriv_e = een_rescaled_e_deriv_e;
}
qmckl_exit_code rc =
qmckl_compute_factor_een_rescaled_e_deriv_e(context,
rc = qmckl_compute_factor_een_rescaled_e_deriv_e(context,
ctx->electron.walk_num,
ctx->electron.num,
ctx->jastrow.cord_num,
@ -3964,8 +3954,7 @@ qmckl_exit_code qmckl_provide_een_rescaled_n(qmckl_context context)
ctx->jastrow.een_rescaled_n = een_rescaled_n;
}
qmckl_exit_code rc =
qmckl_compute_een_rescaled_n(context,
rc = qmckl_compute_een_rescaled_n(context,
ctx->electron.walk_num,
ctx->electron.num,
ctx->nucleus.num,
@ -4303,8 +4292,7 @@ qmckl_exit_code qmckl_provide_een_rescaled_n_deriv_e(qmckl_context context)
ctx->jastrow.een_rescaled_n_deriv_e = een_rescaled_n_deriv_e;
}
qmckl_exit_code rc =
qmckl_compute_factor_een_rescaled_n_deriv_e(context,
rc = qmckl_compute_factor_een_rescaled_n_deriv_e(context,
ctx->electron.walk_num,
ctx->electron.num,
ctx->nucleus.num,
@ -4805,8 +4793,7 @@ qmckl_exit_code qmckl_provide_cord_vect_full(qmckl_context context)
ctx->jastrow.cord_vect_full = cord_vect_full;
}
qmckl_exit_code rc =
qmckl_compute_cord_vect_full(context,
rc = qmckl_compute_cord_vect_full(context,
ctx->nucleus.num,
ctx->jastrow.dim_cord_vect,
ctx->jastrow.type_nucl_num,
@ -4856,8 +4843,7 @@ qmckl_exit_code qmckl_provide_lkpm_combined_index(qmckl_context context)
ctx->jastrow.lkpm_combined_index = lkpm_combined_index;
}
qmckl_exit_code rc =
qmckl_compute_lkpm_combined_index(context,
rc = qmckl_compute_lkpm_combined_index(context,
ctx->jastrow.cord_num,
ctx->jastrow.dim_cord_vect,
ctx->jastrow.lkpm_combined_index);
@ -4942,9 +4928,7 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context)
}
#endif
if(default_compute) {
qmckl_exit_code rc =
qmckl_compute_tmp_c(context,
rc = qmckl_compute_tmp_c(context,
ctx->jastrow.cord_num,
ctx->electron.num,
ctx->nucleus.num,
@ -4955,7 +4939,6 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context)
if (rc != QMCKL_SUCCESS) {
return rc;
}
}
ctx->jastrow.tmp_c_date = ctx->date;
@ -4997,48 +4980,8 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context)
ctx->jastrow.dtmp_c = dtmp_c;
}
/* Choose the correct compute function (depending on offload type) */
bool default_compute = true;
#ifdef HAVE_OPENACC_OFFLOAD
if(ctx->jastrow.offload_type == OFFLOAD_OPENACC) {
qmckl_exit_code rc =
qmckl_compute_dtmp_c_acc_offload(context,
ctx->jastrow.cord_num,
ctx->electron.num,
ctx->nucleus.num,
ctx->electron.walk_num,
ctx->jastrow.een_rescaled_e_deriv_e,
ctx->jastrow.een_rescaled_n,
ctx->jastrow.dtmp_c);
default_compute = false;
if (rc != QMCKL_SUCCESS) {
return rc;
}
}
#endif
#ifdef HAVE_CUBLAS_OFFLOAD
if(ctx->jastrow.offload_type == OFFLOAD_CUBLAS) {
qmckl_exit_code rc =
qmckl_compute_dtmp_c_cublas_offload(context,
ctx->jastrow.cord_num,
ctx->electron.num,
ctx->nucleus.num,
ctx->electron.walk_num,
ctx->jastrow.een_rescaled_e_deriv_e,
ctx->jastrow.een_rescaled_n,
ctx->jastrow.dtmp_c);
default_compute = false;
if (rc != QMCKL_SUCCESS) {
return rc;
}
}
#endif
if(default_compute) {
qmckl_exit_code rc =
qmckl_compute_dtmp_c(context,
rc = qmckl_compute_dtmp_c(context,
ctx->jastrow.cord_num,
ctx->electron.num,
ctx->nucleus.num,
@ -5049,7 +4992,6 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context)
if (rc != QMCKL_SUCCESS) {
return rc;
}
}
ctx->jastrow.dtmp_c_date = ctx->date;
}
@ -6509,8 +6451,7 @@ qmckl_exit_code qmckl_provide_factor_een(qmckl_context context)
ctx->jastrow.factor_een = factor_een;
}
qmckl_exit_code rc =
qmckl_compute_factor_een(context,
rc = qmckl_compute_factor_een(context,
ctx->electron.walk_num,
ctx->electron.num,
ctx->nucleus.num,
@ -7022,8 +6963,7 @@ qmckl_exit_code qmckl_provide_factor_een_deriv_e(qmckl_context context)
ctx->jastrow.factor_een_deriv_e = factor_een_deriv_e;
}
qmckl_exit_code rc =
qmckl_compute_factor_een_deriv_e(context,
rc = qmckl_compute_factor_een_deriv_e(context,
ctx->electron.walk_num,
ctx->electron.num,
ctx->nucleus.num,

View File

@ -849,13 +849,13 @@ qmckl_compute_mo_basis_mo_vgl_hpc (const qmckl_context context,
int64_t n0 = nidx-4;
n0 = n0 < 0 ? 0 : n0;
for (int64_t n=n0 ; n < nidx ; n+=1) {
const double* restrict ck = coef_normalized_t + idx[n]*mo_num;
const double a1 = av1[n];
const double a2 = av2[n];
const double a3 = av3[n];
const double a4 = av4[n];
const double a5 = av5[n];
for (int64_t m=n0 ; m < nidx ; m+=1) {
const double* restrict ck = coef_normalized_t + idx[m]*mo_num;
const double a1 = av1[m];
const double a2 = av2[m];
const double a3 = av3[m];
const double a4 = av4[m];
const double a5 = av5[m];
#ifdef HAVE_OPENMP
#pragma omp simd

View File

@ -965,7 +965,7 @@ qmckl_exit_code qmckl_sherman_morrison_smw32s(const qmckl_context context,
rc = qmckl_woodbury_3(context, LDS, Dim, Updates_3block, Updates_index_3block, breakdown, Slater_inv, determinant);
if (rc != 0) { // Send the entire block to slagel_splitting
uint64_t l = 0;
rc = qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block,
(void) qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block, Updates_index_3block,
breakdown, Slater_inv, later_updates + (Dim * later), later_index + later, &l, determinant);
later = later + l;
}