mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-12-22 20:36:01 +01:00
Fix OpenACC
This commit is contained in:
parent
72fad819bf
commit
0966e1e2b1
@ -5689,20 +5689,29 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload (
|
||||
|
||||
// Compute array access strides:
|
||||
// For tmp_c...
|
||||
int stride_k_c = elec_num;
|
||||
int stride_j_c = stride_k_c * nucl_num;
|
||||
int stride_i_c = stride_j_c * (cord_num+1);
|
||||
int stride_nw_c = stride_i_c * cord_num;
|
||||
const int64_t stride_k_c = elec_num;
|
||||
const int64_t stride_j_c = stride_k_c * nucl_num;
|
||||
const int64_t stride_i_c = stride_j_c * (cord_num+1);
|
||||
const int64_t stride_nw_c = stride_i_c * cord_num;
|
||||
// For een_rescaled_e...
|
||||
int stride_m_e = elec_num;
|
||||
int stride_i_e = stride_m_e * elec_num;
|
||||
int stride_nw_e = stride_i_e * (cord_num+1);
|
||||
const int64_t stride_m_e = elec_num;
|
||||
const int64_t stride_i_e = stride_m_e * elec_num;
|
||||
const int64_t stride_nw_e = stride_i_e * (cord_num+1);
|
||||
// For een_rescaled_n...
|
||||
int stride_k_n = elec_num;
|
||||
int stride_j_n = stride_k_n * nucl_num;
|
||||
int stride_nw_n = stride_j_n * (cord_num+1);
|
||||
const int64_t stride_k_n = elec_num;
|
||||
const int64_t stride_j_n = stride_k_n * nucl_num;
|
||||
const int64_t stride_nw_n = stride_j_n * (cord_num+1);
|
||||
|
||||
const int64_t size_tmp_c = elec_num*nucl_num*(cord_num+1)*cord_num*walk_num;
|
||||
const int64_t size_e = walk_num*(cord_num+1)*elec_num*elec_num;
|
||||
const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num;
|
||||
|
||||
#pragma acc parallel create(tmp_c[0:size_tmp_c]) copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n])
|
||||
{
|
||||
#pragma acc loop independent gang worker vector
|
||||
for (int64_t i=0 ; i<size_tmp_c ; ++i)
|
||||
tmp_c[i] = 0.;
|
||||
|
||||
#pragma acc parallel
|
||||
#pragma acc loop independent gang worker vector collapse(5)
|
||||
for (int nw=0; nw < walk_num; ++nw) {
|
||||
for (int i=0; i<cord_num; ++i){
|
||||
@ -5726,6 +5735,7 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload (
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
@ -6107,7 +6117,7 @@ qmckl_exit_code qmckl_compute_dtmp_c_hpc (
|
||||
|
||||
#+NAME: qmckl_factor_dtmp_c_acc_offload_args
|
||||
| Variable | Type | In/Out | Description |
|
||||
|--------------------------+------------------------------------------------------------------+--------+-----------------------------------------------|
|
||||
|--------------------------+---------------------------------------------------------------------+--------+-----------------------------------------------|
|
||||
| ~context~ | ~qmckl_context~ | in | Global state |
|
||||
| ~cord_num~ | ~int64_t~ | in | Order of polynomials |
|
||||
| ~elec_num~ | ~int64_t~ | in | Number of electrons |
|
||||
@ -6115,7 +6125,7 @@ qmckl_exit_code qmckl_compute_dtmp_c_hpc (
|
||||
| ~walk_num~ | ~int64_t~ | in | Number of walkers |
|
||||
| ~een_rescaled_e_deriv_e~ | ~double[walk_num][0:cord_num][elec_num][4][elec_num]~ | in | Electron-electron rescaled factor derivatives |
|
||||
| ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor |
|
||||
| ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients |
|
||||
| ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][4][elec_num]~ | out | vector of non-zero coefficients |
|
||||
|
||||
|
||||
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
||||
@ -6148,23 +6158,32 @@ qmckl_exit_code qmckl_compute_dtmp_c_acc_offload (
|
||||
|
||||
// Compute strides...
|
||||
// For dtmp_c
|
||||
int stride_l_d = elec_num;
|
||||
int stride_k_d = stride_l_d * 4;
|
||||
int stride_j_d = stride_k_d * nucl_num;
|
||||
int stride_i_d = stride_j_d * (cord_num+1);
|
||||
int stride_nw_d = stride_i_d * cord_num;
|
||||
const int64_t stride_l_d = elec_num;
|
||||
const int64_t stride_k_d = stride_l_d * 4;
|
||||
const int64_t stride_j_d = stride_k_d * nucl_num;
|
||||
const int64_t stride_i_d = stride_j_d * (cord_num+1);
|
||||
const int64_t stride_nw_d = stride_i_d * cord_num;
|
||||
// For een_rescaled_e_deriv_e
|
||||
int stride_l_e = elec_num;
|
||||
int stride_n_e = stride_l_e * 4;
|
||||
int stride_i_e = stride_n_e * elec_num;
|
||||
int stride_nw_e = stride_i_e * cord_num;
|
||||
const int64_t stride_l_e = elec_num;
|
||||
const int64_t stride_n_e = stride_l_e * 4;
|
||||
const int64_t stride_i_e = stride_n_e * elec_num;
|
||||
const int64_t stride_nw_e = stride_i_e * cord_num;
|
||||
// For een_rescaled_n
|
||||
int stride_k_n = elec_num;
|
||||
int stride_j_n = stride_k_n * nucl_num;
|
||||
int stride_nw_n = stride_j_n * (cord_num+1);
|
||||
const int64_t stride_k_n = elec_num;
|
||||
const int64_t stride_j_n = stride_k_n * nucl_num;
|
||||
const int64_t stride_nw_n = stride_j_n * (cord_num+1);
|
||||
|
||||
|
||||
#pragma acc parallel
|
||||
const int64_t size_dtmp_c = walk_num*cord_num*(cord_num+1)*nucl_num*4*elec_num;
|
||||
const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num;
|
||||
const int64_t size_e = walk_num*(cord_num+1)*elec_num*4*elec_num;
|
||||
|
||||
#pragma acc parallel create(dtmp_c[0:size_dtmp_c]) copyout(dtmp_c [0:size_dtmp_c]) copyin(een_rescaled_e_deriv_e[0:size_e], een_rescaled_n[0:size_n])
|
||||
{
|
||||
#pragma acc loop independent gang worker vector
|
||||
for (int64_t i=0 ; i<size_dtmp_c ; ++i)
|
||||
dtmp_c[i] = 0.;
|
||||
|
||||
#pragma loop independent gang worker vector collapse(6)
|
||||
for (int nw=0; nw < walk_num; nw++) {
|
||||
for (int i=0; i < cord_num; i++) {
|
||||
@ -6189,6 +6208,7 @@ qmckl_exit_code qmckl_compute_dtmp_c_acc_offload (
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return QMCKL_SUCCESS;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user