mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2024-10-19 22:41:55 +02:00
Fix OpenACC and OpenMP implementations
This commit is contained in:
parent
7dc02571e9
commit
3cd30bc8f3
@ -5915,13 +5915,9 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload (const qmckl_context context,
|
||||
const int64_t size_e = walk_num*(cord_num+1)*elec_num*elec_num;
|
||||
const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num;
|
||||
|
||||
#pragma acc parallel copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n])
|
||||
#pragma acc parallel copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n])
|
||||
{
|
||||
#pragma acc loop independent gang worker vector
|
||||
for (int64_t i=0 ; i<size_tmp_c ; ++i)
|
||||
tmp_c[i] = 0.;
|
||||
|
||||
#pragma acc loop independent gang worker vector collapse(5)
|
||||
#pragma acc loop independent gang worker vector collapse(5)
|
||||
for (int nw=0; nw < walk_num; ++nw) {
|
||||
for (int i=0; i<cord_num; ++i){
|
||||
|
||||
@ -5938,7 +5934,6 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload (const qmckl_context context,
|
||||
een_rescaled_e[l + m*stride_m_e + i*stride_i_e + nw*stride_nw_e] *
|
||||
een_rescaled_n[m + k*stride_k_n + j*stride_j_n + nw*stride_nw_n];
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -6015,13 +6010,10 @@ qmckl_compute_tmp_c_omp_offload (const qmckl_context context,
|
||||
const int64_t size_e = walk_num*(cord_num+1)*elec_num*elec_num;
|
||||
const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num;
|
||||
|
||||
#pragma omp parallel copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n])
|
||||
// WARNING This implementation seems unomptimized
|
||||
#pragma omp target map(from:tmp_c[0:size_tmp_c]) map(to:een_rescaled_e[0:size_e], een_rescaled_n[0:size_n])
|
||||
{
|
||||
#pragma omp loop independent gang worker vector
|
||||
for (int64_t i=0 ; i<size_tmp_c ; ++i)
|
||||
tmp_c[i] = 0.;
|
||||
|
||||
#pragma omp loop independent gang worker vector collapse(5)
|
||||
#pragma omp teams distribute parallel for collapse(5)
|
||||
for (int nw=0; nw < walk_num; ++nw) {
|
||||
for (int i=0; i<cord_num; ++i){
|
||||
|
||||
@ -6038,7 +6030,6 @@ qmckl_compute_tmp_c_omp_offload (const qmckl_context context,
|
||||
een_rescaled_e[l + m*stride_m_e + i*stride_i_e + nw*stride_nw_e] *
|
||||
een_rescaled_n[m + k*stride_k_n + j*stride_j_n + nw*stride_nw_n];
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -6471,18 +6462,13 @@ qmckl_exit_code qmckl_compute_dtmp_c_acc_offload (
|
||||
const int64_t stride_j_n = stride_k_n * nucl_num;
|
||||
const int64_t stride_nw_n = stride_j_n * (cord_num+1);
|
||||
|
||||
|
||||
const int64_t size_dtmp_c = walk_num*cord_num*(cord_num+1)*nucl_num*4*elec_num;
|
||||
const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num;
|
||||
const int64_t size_e = walk_num*(cord_num+1)*elec_num*4*elec_num;
|
||||
|
||||
#pragma acc parallel copyout(dtmp_c [0:size_dtmp_c]) copyin(een_rescaled_e_deriv_e[0:size_e], een_rescaled_n[0:size_n])
|
||||
{
|
||||
#pragma acc loop independent gang worker vector
|
||||
for (int64_t i=0 ; i<size_dtmp_c ; ++i)
|
||||
dtmp_c[i] = 0.;
|
||||
|
||||
#pragma loop independent gang worker vector collapse(6)
|
||||
#pragma acc loop independent gang worker vector collapse(6)
|
||||
for (int nw=0; nw < walk_num; nw++) {
|
||||
for (int i=0; i < cord_num; i++) {
|
||||
|
||||
@ -6579,13 +6565,11 @@ qmckl_exit_code qmckl_compute_dtmp_c_omp_offload (
|
||||
const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num;
|
||||
const int64_t size_e = walk_num*(cord_num+1)*elec_num*4*elec_num;
|
||||
|
||||
#pragma omp parallel copyout(dtmp_c [0:size_dtmp_c]) copyin(een_rescaled_e_deriv_e[0:size_e], een_rescaled_n[0:size_n])
|
||||
// WARNING This implementation seems unomptimized
|
||||
#pragma omp target map(from:dtmp_c[0:size_dtmp_c]) map(to:een_rescaled_e_deriv_e[0:size_e], een_rescaled_n[0:size_n])
|
||||
{
|
||||
#pragma omp target
|
||||
for (int64_t i=0 ; i<size_dtmp_c ; ++i)
|
||||
dtmp_c[i] = 0.;
|
||||
|
||||
#pragma loop independent gang worker vector collapse(6)
|
||||
#pragma omp teams distribute parallel for collapse(6)
|
||||
for (int nw=0; nw < walk_num; nw++) {
|
||||
for (int i=0; i < cord_num; i++) {
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user