1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2025-01-08 20:33:40 +01:00

First working OpenMP version

This commit is contained in:
Anthony Scemama 2022-04-06 17:58:05 +02:00
parent aeec721774
commit d1dc35eaa4

View File

@ -5869,10 +5869,6 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload (const qmckl_context context,
#pragma acc parallel copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n]) #pragma acc parallel copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n])
{ {
#pragma acc loop independent gang worker vector
for (int64_t i=0 ; i<size_tmp_c ; ++i)
tmp_c[i] = 0.;
#pragma acc loop independent gang worker vector collapse(5) #pragma acc loop independent gang worker vector collapse(5)
for (int nw=0; nw < walk_num; ++nw) { for (int nw=0; nw < walk_num; ++nw) {
for (int i=0; i<cord_num; ++i){ for (int i=0; i<cord_num; ++i){
@ -5883,7 +5879,7 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload (const qmckl_context context,
for (int l=0; l<elec_num; l++) { for (int l=0; l<elec_num; l++) {
// Single reduction // Single reduction
tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] = 0; tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] = 0.;
for (int m=0; m<elec_num; m++) { for (int m=0; m<elec_num; m++) {
tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] = tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] =
tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] + tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] +
@ -5967,13 +5963,11 @@ qmckl_compute_tmp_c_omp_offload (const qmckl_context context,
const int64_t size_e = walk_num*(cord_num+1)*elec_num*elec_num; const int64_t size_e = walk_num*(cord_num+1)*elec_num*elec_num;
const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num; const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num;
for (int64_t i=0 ; i<size_tmp_c ; ++i)
tmp_c[i] = 0.;
#pragma omp target data map(to:een_rescaled_e[0:size_e], \ #pragma omp target teams distribute parallel for collapse(5) \
map(to:een_rescaled_e[0:size_e], \
een_rescaled_n[0:size_n]) \ een_rescaled_n[0:size_n]) \
map(tofrom:tmp_c[0:size_tmp_c]) map(from:tmp_c[0:size_tmp_c])
#pragma omp target teams distribute parallel for collapse(5)
for (int nw=0; nw < walk_num; ++nw) { for (int nw=0; nw < walk_num; ++nw) {
for (int i=0; i<cord_num; ++i){ for (int i=0; i<cord_num; ++i){
@ -5983,7 +5977,7 @@ qmckl_compute_tmp_c_omp_offload (const qmckl_context context,
for (int l=0; l<elec_num; l++) { for (int l=0; l<elec_num; l++) {
// Single reduction // Single reduction
tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] = 0; tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] = 0.;
for (int m=0; m<elec_num; m++) { for (int m=0; m<elec_num; m++) {
tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] = tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] =
tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] + tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] +
@ -6491,10 +6485,10 @@ qmckl_exit_code qmckl_compute_dtmp_c_omp_offload (
const int64_t size_e = walk_num*(cord_num+1)*elec_num*4*elec_num; const int64_t size_e = walk_num*(cord_num+1)*elec_num*4*elec_num;
#pragma omp target data map(to:een_rescaled_e_deriv_e[0:size_e], \ #pragma omp target teams distribute parallel for collapse(6) \
map(to:een_rescaled_e_deriv_e[0:size_e], \
een_rescaled_n[0:size_n]), \ een_rescaled_n[0:size_n]), \
map(tofrom:dtmp_c[0:size_dtmp_c]) map(tofrom:dtmp_c[0:size_dtmp_c])
#pragma omp target teams distribute parallel for collapse(6)
for (int nw=0; nw < walk_num; nw++) { for (int nw=0; nw < walk_num; nw++) {
for (int i=0; i < cord_num; i++) { for (int i=0; i < cord_num; i++) {