mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-12-22 20:36:01 +01:00
Fix OpenACC
This commit is contained in:
parent
72fad819bf
commit
0966e1e2b1
@ -5689,20 +5689,29 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload (
|
|||||||
|
|
||||||
// Compute array access strides:
|
// Compute array access strides:
|
||||||
// For tmp_c...
|
// For tmp_c...
|
||||||
int stride_k_c = elec_num;
|
const int64_t stride_k_c = elec_num;
|
||||||
int stride_j_c = stride_k_c * nucl_num;
|
const int64_t stride_j_c = stride_k_c * nucl_num;
|
||||||
int stride_i_c = stride_j_c * (cord_num+1);
|
const int64_t stride_i_c = stride_j_c * (cord_num+1);
|
||||||
int stride_nw_c = stride_i_c * cord_num;
|
const int64_t stride_nw_c = stride_i_c * cord_num;
|
||||||
// For een_rescaled_e...
|
// For een_rescaled_e...
|
||||||
int stride_m_e = elec_num;
|
const int64_t stride_m_e = elec_num;
|
||||||
int stride_i_e = stride_m_e * elec_num;
|
const int64_t stride_i_e = stride_m_e * elec_num;
|
||||||
int stride_nw_e = stride_i_e * (cord_num+1);
|
const int64_t stride_nw_e = stride_i_e * (cord_num+1);
|
||||||
// For een_rescaled_n...
|
// For een_rescaled_n...
|
||||||
int stride_k_n = elec_num;
|
const int64_t stride_k_n = elec_num;
|
||||||
int stride_j_n = stride_k_n * nucl_num;
|
const int64_t stride_j_n = stride_k_n * nucl_num;
|
||||||
int stride_nw_n = stride_j_n * (cord_num+1);
|
const int64_t stride_nw_n = stride_j_n * (cord_num+1);
|
||||||
|
|
||||||
|
const int64_t size_tmp_c = elec_num*nucl_num*(cord_num+1)*cord_num*walk_num;
|
||||||
|
const int64_t size_e = walk_num*(cord_num+1)*elec_num*elec_num;
|
||||||
|
const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num;
|
||||||
|
|
||||||
|
#pragma acc parallel create(tmp_c[0:size_tmp_c]) copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n])
|
||||||
|
{
|
||||||
|
#pragma acc loop independent gang worker vector
|
||||||
|
for (int64_t i=0 ; i<size_tmp_c ; ++i)
|
||||||
|
tmp_c[i] = 0.;
|
||||||
|
|
||||||
#pragma acc parallel
|
|
||||||
#pragma acc loop independent gang worker vector collapse(5)
|
#pragma acc loop independent gang worker vector collapse(5)
|
||||||
for (int nw=0; nw < walk_num; ++nw) {
|
for (int nw=0; nw < walk_num; ++nw) {
|
||||||
for (int i=0; i<cord_num; ++i){
|
for (int i=0; i<cord_num; ++i){
|
||||||
@ -5726,6 +5735,7 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return QMCKL_SUCCESS;
|
return QMCKL_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -6107,7 +6117,7 @@ qmckl_exit_code qmckl_compute_dtmp_c_hpc (
|
|||||||
|
|
||||||
#+NAME: qmckl_factor_dtmp_c_acc_offload_args
|
#+NAME: qmckl_factor_dtmp_c_acc_offload_args
|
||||||
| Variable | Type | In/Out | Description |
|
| Variable | Type | In/Out | Description |
|
||||||
|--------------------------+------------------------------------------------------------------+--------+-----------------------------------------------|
|
|--------------------------+---------------------------------------------------------------------+--------+-----------------------------------------------|
|
||||||
| ~context~ | ~qmckl_context~ | in | Global state |
|
| ~context~ | ~qmckl_context~ | in | Global state |
|
||||||
| ~cord_num~ | ~int64_t~ | in | Order of polynomials |
|
| ~cord_num~ | ~int64_t~ | in | Order of polynomials |
|
||||||
| ~elec_num~ | ~int64_t~ | in | Number of electrons |
|
| ~elec_num~ | ~int64_t~ | in | Number of electrons |
|
||||||
@ -6115,7 +6125,7 @@ qmckl_exit_code qmckl_compute_dtmp_c_hpc (
|
|||||||
| ~walk_num~ | ~int64_t~ | in | Number of walkers |
|
| ~walk_num~ | ~int64_t~ | in | Number of walkers |
|
||||||
| ~een_rescaled_e_deriv_e~ | ~double[walk_num][0:cord_num][elec_num][4][elec_num]~ | in | Electron-electron rescaled factor derivatives |
|
| ~een_rescaled_e_deriv_e~ | ~double[walk_num][0:cord_num][elec_num][4][elec_num]~ | in | Electron-electron rescaled factor derivatives |
|
||||||
| ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor |
|
| ~een_rescaled_n~ | ~double[walk_num][0:cord_num][nucl_num][elec_num]~ | in | Electron-nucleus rescaled factor |
|
||||||
| ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][elec_num]~ | out | vector of non-zero coefficients |
|
| ~dtmp_c~ | ~double[walk_num][0:cord_num-1][0:cord_num][nucl_num][4][elec_num]~ | out | vector of non-zero coefficients |
|
||||||
|
|
||||||
|
|
||||||
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
#+begin_src c :comments org :tangle (eval c) :noweb yes
|
||||||
@ -6148,23 +6158,32 @@ qmckl_exit_code qmckl_compute_dtmp_c_acc_offload (
|
|||||||
|
|
||||||
// Compute strides...
|
// Compute strides...
|
||||||
// For dtmp_c
|
// For dtmp_c
|
||||||
int stride_l_d = elec_num;
|
const int64_t stride_l_d = elec_num;
|
||||||
int stride_k_d = stride_l_d * 4;
|
const int64_t stride_k_d = stride_l_d * 4;
|
||||||
int stride_j_d = stride_k_d * nucl_num;
|
const int64_t stride_j_d = stride_k_d * nucl_num;
|
||||||
int stride_i_d = stride_j_d * (cord_num+1);
|
const int64_t stride_i_d = stride_j_d * (cord_num+1);
|
||||||
int stride_nw_d = stride_i_d * cord_num;
|
const int64_t stride_nw_d = stride_i_d * cord_num;
|
||||||
// For een_rescaled_e_deriv_e
|
// For een_rescaled_e_deriv_e
|
||||||
int stride_l_e = elec_num;
|
const int64_t stride_l_e = elec_num;
|
||||||
int stride_n_e = stride_l_e * 4;
|
const int64_t stride_n_e = stride_l_e * 4;
|
||||||
int stride_i_e = stride_n_e * elec_num;
|
const int64_t stride_i_e = stride_n_e * elec_num;
|
||||||
int stride_nw_e = stride_i_e * cord_num;
|
const int64_t stride_nw_e = stride_i_e * cord_num;
|
||||||
// For een_rescaled_n
|
// For een_rescaled_n
|
||||||
int stride_k_n = elec_num;
|
const int64_t stride_k_n = elec_num;
|
||||||
int stride_j_n = stride_k_n * nucl_num;
|
const int64_t stride_j_n = stride_k_n * nucl_num;
|
||||||
int stride_nw_n = stride_j_n * (cord_num+1);
|
const int64_t stride_nw_n = stride_j_n * (cord_num+1);
|
||||||
|
|
||||||
|
|
||||||
#pragma acc parallel
|
const int64_t size_dtmp_c = walk_num*cord_num*(cord_num+1)*nucl_num*4*elec_num;
|
||||||
|
const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num;
|
||||||
|
const int64_t size_e = walk_num*(cord_num+1)*elec_num*4*elec_num;
|
||||||
|
|
||||||
|
#pragma acc parallel create(dtmp_c[0:size_dtmp_c]) copyout(dtmp_c [0:size_dtmp_c]) copyin(een_rescaled_e_deriv_e[0:size_e], een_rescaled_n[0:size_n])
|
||||||
|
{
|
||||||
|
#pragma acc loop independent gang worker vector
|
||||||
|
for (int64_t i=0 ; i<size_dtmp_c ; ++i)
|
||||||
|
dtmp_c[i] = 0.;
|
||||||
|
|
||||||
#pragma loop independent gang worker vector collapse(6)
|
#pragma loop independent gang worker vector collapse(6)
|
||||||
for (int nw=0; nw < walk_num; nw++) {
|
for (int nw=0; nw < walk_num; nw++) {
|
||||||
for (int i=0; i < cord_num; i++) {
|
for (int i=0; i < cord_num; i++) {
|
||||||
@ -6189,6 +6208,7 @@ qmckl_exit_code qmckl_compute_dtmp_c_acc_offload (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return QMCKL_SUCCESS;
|
return QMCKL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user