1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2024-12-22 20:36:01 +01:00

Fix OpenACC

This commit is contained in:
Anthony Scemama 2022-04-06 10:42:00 +02:00
parent 72fad819bf
commit 0966e1e2b1

View File

@ -5689,20 +5689,29 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload (
// Compute array access strides:
// For tmp_c...
int stride_k_c = elec_num;
int stride_j_c = stride_k_c * nucl_num;
int stride_i_c = stride_j_c * (cord_num+1);
int stride_nw_c = stride_i_c * cord_num;
const int64_t stride_k_c = elec_num;
const int64_t stride_j_c = stride_k_c * nucl_num;
const int64_t stride_i_c = stride_j_c * (cord_num+1);
const int64_t stride_nw_c = stride_i_c * cord_num;
// For een_rescaled_e...
int stride_m_e = elec_num;
int stride_i_e = stride_m_e * elec_num;
int stride_nw_e = stride_i_e * (cord_num+1);
const int64_t stride_m_e = elec_num;
const int64_t stride_i_e = stride_m_e * elec_num;
const int64_t stride_nw_e = stride_i_e * (cord_num+1);
// For een_rescaled_n...
int stride_k_n = elec_num;
int stride_j_n = stride_k_n * nucl_num;
int stride_nw_n = stride_j_n * (cord_num+1);
const int64_t stride_k_n = elec_num;
const int64_t stride_j_n = stride_k_n * nucl_num;
const int64_t stride_nw_n = stride_j_n * (cord_num+1);
const int64_t size_tmp_c = elec_num*nucl_num*(cord_num+1)*cord_num*walk_num;
const int64_t size_e = walk_num*(cord_num+1)*elec_num*elec_num;
const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num;
#pragma acc parallel create(tmp_c[0:size_tmp_c]) copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n])
{
#pragma acc loop independent gang worker vector
for (int64_t i=0 ; i<size_tmp_c ; ++i)
tmp_c[i] = 0.;
#pragma acc parallel
#pragma acc loop independent gang worker vector collapse(5)
for (int nw=0; nw < walk_num; ++nw) {
for (int i=0; i<cord_num; ++i){
@ -5726,6 +5735,7 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload (
}
}
}
}
return QMCKL_SUCCESS;
}
@ -6107,7 +6117,7 @@ qmckl_exit_code qmckl_compute_dtmp_c_hpc (
#+NAME: qmckl_factor_dtmp_c_acc_offload_args
| Variable | Type | In/Out | Description |
|--------------------------+------------------------------------------------------------------+--------+-----------------------------------------------|
|--------------------------+---------------------------------------------------------------------+--------+-----------------------------------------------|
| ~context~ | ~qmckl_context~ | in | Global state |
| ~cord_num~ | ~int64_t~ | in | Order of polynomials |
| ~elec_num~ | ~int64_t~ | in | Number of electrons |
@ -6115,7 +6125,7 @@ qmckl_exit_code qmckl_compute_dtmp_c_hpc (
| ~walk_num~ | ~int64_t~ | in | Number of walkers |
| ~een_rescaled_e_deriv_e~ | ~double[walk_num][0:cord_num][elec_num][4][elec_num]~ | in | Electron-electron rescaled factor derivatives |
| ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor |
| ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients |
| ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][4][elec_num]~ | out | vector of non-zero coefficients |
#+begin_src c :comments org :tangle (eval c) :noweb yes
@ -6148,23 +6158,32 @@ qmckl_exit_code qmckl_compute_dtmp_c_acc_offload (
// Compute strides...
// For dtmp_c
int stride_l_d = elec_num;
int stride_k_d = stride_l_d * 4;
int stride_j_d = stride_k_d * nucl_num;
int stride_i_d = stride_j_d * (cord_num+1);
int stride_nw_d = stride_i_d * cord_num;
const int64_t stride_l_d = elec_num;
const int64_t stride_k_d = stride_l_d * 4;
const int64_t stride_j_d = stride_k_d * nucl_num;
const int64_t stride_i_d = stride_j_d * (cord_num+1);
const int64_t stride_nw_d = stride_i_d * cord_num;
// For een_rescaled_e_deriv_e
int stride_l_e = elec_num;
int stride_n_e = stride_l_e * 4;
int stride_i_e = stride_n_e * elec_num;
int stride_nw_e = stride_i_e * cord_num;
const int64_t stride_l_e = elec_num;
const int64_t stride_n_e = stride_l_e * 4;
const int64_t stride_i_e = stride_n_e * elec_num;
const int64_t stride_nw_e = stride_i_e * cord_num;
// For een_rescaled_n
int stride_k_n = elec_num;
int stride_j_n = stride_k_n * nucl_num;
int stride_nw_n = stride_j_n * (cord_num+1);
const int64_t stride_k_n = elec_num;
const int64_t stride_j_n = stride_k_n * nucl_num;
const int64_t stride_nw_n = stride_j_n * (cord_num+1);
#pragma acc parallel
const int64_t size_dtmp_c = walk_num*cord_num*(cord_num+1)*nucl_num*4*elec_num;
const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num;
const int64_t size_e = walk_num*(cord_num+1)*elec_num*4*elec_num;
#pragma acc parallel create(dtmp_c[0:size_dtmp_c]) copyout(dtmp_c [0:size_dtmp_c]) copyin(een_rescaled_e_deriv_e[0:size_e], een_rescaled_n[0:size_n])
{
#pragma acc loop independent gang worker vector
for (int64_t i=0 ; i<size_dtmp_c ; ++i)
dtmp_c[i] = 0.;
#pragma loop independent gang worker vector collapse(6)
for (int nw=0; nw < walk_num; nw++) {
for (int i=0; i < cord_num; i++) {
@ -6189,6 +6208,7 @@ qmckl_exit_code qmckl_compute_dtmp_c_acc_offload (
}
}
}
}
return QMCKL_SUCCESS;
}