mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2025-01-03 10:06:09 +01:00
OpenMP
This commit is contained in:
parent
88e8404b2a
commit
3ea90bc4a5
@ -5969,13 +5969,13 @@ qmckl_compute_tmp_c_omp_offload (const qmckl_context context,
|
|||||||
const int64_t size_e = walk_num*(cord_num+1)*elec_num*elec_num;
|
const int64_t size_e = walk_num*(cord_num+1)*elec_num*elec_num;
|
||||||
const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num;
|
const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num;
|
||||||
|
|
||||||
#pragma omp parallel copyout(tmp_c [0:size_tmp_c]) copyin(een_rescaled_e[0:size_e], een_rescaled_n[0:size_n])
|
for (int64_t i=0 ; i<size_tmp_c ; ++i)
|
||||||
{
|
|
||||||
#pragma omp loop independent gang worker vector
|
|
||||||
for (int64_t i=0 ; i<size_tmp_c ; ++i)
|
|
||||||
tmp_c[i] = 0.;
|
tmp_c[i] = 0.;
|
||||||
|
|
||||||
#pragma omp loop independent gang worker vector collapse(5)
|
#pragma omp target data map(to:een_rescaled_e[0:size_e], \
|
||||||
|
een_rescaled_n[0:size_n]) \
|
||||||
|
map(tofrom:tmp_c[0:size_tmp_c])
|
||||||
|
#pragma omp target teams distribute parallel for collapse(5)
|
||||||
for (int nw=0; nw < walk_num; ++nw) {
|
for (int nw=0; nw < walk_num; ++nw) {
|
||||||
for (int i=0; i<cord_num; ++i){
|
for (int i=0; i<cord_num; ++i){
|
||||||
|
|
||||||
@ -5998,7 +5998,6 @@ qmckl_compute_tmp_c_omp_offload (const qmckl_context context,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return QMCKL_SUCCESS;
|
return QMCKL_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -6314,8 +6313,6 @@ qmckl_compute_dtmp_c_hpc (const qmckl_context context,
|
|||||||
const int64_t bf = elec_num*nucl_num*(cord_num+1);
|
const int64_t bf = elec_num*nucl_num*(cord_num+1);
|
||||||
const int64_t cf = elec_num*4*nucl_num*(cord_num+1);
|
const int64_t cf = elec_num*4*nucl_num*(cord_num+1);
|
||||||
|
|
||||||
printf("COUCOU\n");
|
|
||||||
|
|
||||||
#ifdef HAVE_OPENMP
|
#ifdef HAVE_OPENMP
|
||||||
#pragma omp parallel for collapse(2)
|
#pragma omp parallel for collapse(2)
|
||||||
#endif
|
#endif
|
||||||
@ -6398,10 +6395,6 @@ qmckl_exit_code qmckl_compute_dtmp_c_acc_offload (
|
|||||||
|
|
||||||
#pragma acc parallel copyout(dtmp_c [0:size_dtmp_c]) copyin(een_rescaled_e_deriv_e[0:size_e], een_rescaled_n[0:size_n])
|
#pragma acc parallel copyout(dtmp_c [0:size_dtmp_c]) copyin(een_rescaled_e_deriv_e[0:size_e], een_rescaled_n[0:size_n])
|
||||||
{
|
{
|
||||||
#pragma acc loop independent gang worker vector
|
|
||||||
for (int64_t i=0 ; i<size_dtmp_c ; ++i)
|
|
||||||
dtmp_c[i] = 0.;
|
|
||||||
|
|
||||||
#pragma loop independent gang worker vector collapse(6)
|
#pragma loop independent gang worker vector collapse(6)
|
||||||
for (int nw=0; nw < walk_num; nw++) {
|
for (int nw=0; nw < walk_num; nw++) {
|
||||||
for (int i=0; i < cord_num; i++) {
|
for (int i=0; i < cord_num; i++) {
|
||||||
@ -6413,7 +6406,7 @@ qmckl_exit_code qmckl_compute_dtmp_c_acc_offload (
|
|||||||
for(int m=0; m<elec_num; m++) {
|
for(int m=0; m<elec_num; m++) {
|
||||||
|
|
||||||
// Single reduction
|
// Single reduction
|
||||||
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] = 0;
|
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] = 0.;
|
||||||
for(int n=0; n<elec_num; n++){
|
for(int n=0; n<elec_num; n++){
|
||||||
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] =
|
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] =
|
||||||
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] +
|
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] +
|
||||||
@ -6499,13 +6492,11 @@ qmckl_exit_code qmckl_compute_dtmp_c_omp_offload (
|
|||||||
const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num;
|
const int64_t size_n = walk_num*(cord_num+1)*nucl_num*elec_num;
|
||||||
const int64_t size_e = walk_num*(cord_num+1)*elec_num*4*elec_num;
|
const int64_t size_e = walk_num*(cord_num+1)*elec_num*4*elec_num;
|
||||||
|
|
||||||
#pragma omp parallel copyout(dtmp_c [0:size_dtmp_c]) copyin(een_rescaled_e_deriv_e[0:size_e], een_rescaled_n[0:size_n])
|
|
||||||
{
|
|
||||||
#pragma omp target
|
|
||||||
for (int64_t i=0 ; i<size_dtmp_c ; ++i)
|
|
||||||
dtmp_c[i] = 0.;
|
|
||||||
|
|
||||||
#pragma loop independent gang worker vector collapse(6)
|
#pragma omp target data map(to:een_rescaled_e_deriv_e[0:size_e], \
|
||||||
|
een_rescaled_n[0:size_n]), \
|
||||||
|
map(tofrom:dtmp_c[0:size_dtmp_c])
|
||||||
|
#pragma omp target teams distribute parallel for collapse(6)
|
||||||
for (int nw=0; nw < walk_num; nw++) {
|
for (int nw=0; nw < walk_num; nw++) {
|
||||||
for (int i=0; i < cord_num; i++) {
|
for (int i=0; i < cord_num; i++) {
|
||||||
|
|
||||||
@ -6516,7 +6507,7 @@ qmckl_exit_code qmckl_compute_dtmp_c_omp_offload (
|
|||||||
for(int m=0; m<elec_num; m++) {
|
for(int m=0; m<elec_num; m++) {
|
||||||
|
|
||||||
// Single reduction
|
// Single reduction
|
||||||
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] = 0;
|
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] = 0.;
|
||||||
for(int n=0; n<elec_num; n++){
|
for(int n=0; n<elec_num; n++){
|
||||||
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] =
|
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] =
|
||||||
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] +
|
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] +
|
||||||
@ -6529,7 +6520,6 @@ qmckl_exit_code qmckl_compute_dtmp_c_omp_offload (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return QMCKL_SUCCESS;
|
return QMCKL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user