From eb71a752f5bfc8a3afa04d0f0995078163bd35c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Delval?= Date: Tue, 5 Apr 2022 14:28:35 +0200 Subject: [PATCH] Fixed naive GPU kernels and ignored variable issue --- org/ao_grid.f90 | 114 -------------------------- org/qmckl_jastrow.org | 186 +++++++++++++++++++++++++++--------------- 2 files changed, 120 insertions(+), 180 deletions(-) delete mode 100644 org/ao_grid.f90 diff --git a/org/ao_grid.f90 b/org/ao_grid.f90 deleted file mode 100644 index 685313f..0000000 --- a/org/ao_grid.f90 +++ /dev/null @@ -1,114 +0,0 @@ -subroutine qmckl_check_error(rc, message) - use qmckl - implicit none - integer(qmckl_exit_code), intent(in) :: rc - character(len=*) , intent(in) :: message - character(len=128) :: str_buffer - if (rc /= QMCKL_SUCCESS) then - print *, message - call qmckl_string_of_error(rc, str_buffer) - print *, str_buffer - call exit(rc) - end if -end subroutine qmckl_check_error - -program ao_grid - use qmckl - implicit none - - integer(qmckl_context) :: qmckl_ctx ! QMCkl context - integer(qmckl_exit_code) :: rc ! Exit code of QMCkl functions - - character(len=128) :: trexio_filename - character(len=128) :: str_buffer - integer :: ao_id - integer :: point_num_x - - integer(c_int64_t) :: nucl_num - double precision, allocatable :: nucl_coord(:,:) - - integer(c_int64_t) :: point_num - integer(c_int64_t) :: ao_num - integer(c_int64_t) :: ipoint, i, j, k - double precision :: x, y, z, dr(3) - double precision :: rmin(3), rmax(3) - double precision, allocatable :: points(:,:) - double precision, allocatable :: ao_vgl(:,:,:) - -if (iargc() /= 3) then - print *, 'Syntax: ao_grid ' - call exit(-1) -end if -call getarg(1, trexio_filename) -call getarg(2, str_buffer) -read(str_buffer, *) ao_id -call getarg(3, str_buffer) -read(str_buffer, *) point_num_x - -if (point_num_x < 0 .or. point_num_x > 300) then - print *, 'Error: 0 < point_num < 300' - call exit(-1) -end if - -qmckl_ctx = qmckl_context_create() -rc = qmckl_trexio_read(qmckl_ctx, trexio_filename, 1_8*len(trim(trexio_filename))) -call qmckl_check_error(rc, 'Read TREXIO') - -rc = qmckl_get_ao_basis_ao_num(qmckl_ctx, ao_num) -call qmckl_check_error(rc, 'Getting ao_num') - -if (ao_id < 0 .or. ao_id > ao_num) then - print *, 'Error: 0 < ao_id < ', ao_num - call exit(-1) -end if - -rc = qmckl_get_nucleus_num(qmckl_ctx, nucl_num) -call qmckl_check_error(rc, 'Get nucleus num') - -allocate( nucl_coord(3, nucl_num) ) -rc = qmckl_get_nucleus_coord(qmckl_ctx, 'N', nucl_coord, 3_8*nucl_num) -call qmckl_check_error(rc, 'Get nucleus coord') - -rmin(1) = minval( nucl_coord(1,:) ) - 5.d0 -rmin(2) = minval( nucl_coord(2,:) ) - 5.d0 -rmin(3) = minval( nucl_coord(3,:) ) - 5.d0 - -rmax(1) = maxval( nucl_coord(1,:) ) + 5.d0 -rmax(2) = maxval( nucl_coord(2,:) ) + 5.d0 -rmax(3) = maxval( nucl_coord(3,:) ) + 5.d0 - -dr(1:3) = (rmax(1:3) - rmin(1:3)) / dble(point_num_x-1) - -point_num = point_num_x**3 -allocate( points(point_num, 3) ) -ipoint=0 -z = rmin(3) -do k=1,point_num_x - y = rmin(2) - do j=1,point_num_x - x = rmin(1) - do i=1,point_num_x - ipoint = ipoint+1 - points(ipoint,1) = x - points(ipoint,2) = y - points(ipoint,3) = z - x = x + dr(1) - end do - y = y + dr(2) - end do - z = z + dr(3) -end do - -rc = qmckl_set_point(qmckl_ctx, 'T', points, point_num) -call qmckl_check_error(rc, 'Setting points') - -allocate( ao_vgl(ao_num, 5, point_num) ) -rc = qmckl_get_ao_basis_ao_vgl(qmckl_ctx, ao_vgl, ao_num*5_8*point_num) -call qmckl_check_error(rc, 'Setting points') - -do ipoint=1, point_num - print '(3(F16.10,X),E20.10)', points(ipoint, 1:3), ao_vgl(ao_id,1,ipoint) -end do - -deallocate( nucl_coord, points, ao_vgl ) -end program ao_grid diff --git a/org/qmckl_jastrow.org b/org/qmckl_jastrow.org index 46bb5da..e69088c 100644 --- a/org/qmckl_jastrow.org +++ b/org/qmckl_jastrow.org @@ -4889,57 +4889,65 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context) } ctx->jastrow.tmp_c = tmp_c; } + /* Choose the correct compute function (depending on offload type) */ - bool default_compute = true; + switch(ctx->jastrow.offload_type) { + case OFFLOAD_OPENACC: + #ifdef HAVE_OPENACC_OFFLOAD + rc = + qmckl_compute_tmp_c_acc_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + #elif + rc = qmckl_compute_tmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); - #ifdef HAVE_OPENACC_OFFLOAD - if(ctx->jastrow.offload_type == OFFLOAD_OPENACC) { - qmckl_exit_code rc = - qmckl_compute_tmp_c_acc_offload(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - default_compute = false; - if (rc != QMCKL_SUCCESS) { - return rc; - } + #endif + break; + case OFFLOAD_CUBLAS: + #ifdef HAVE_CUBLAS_OFFLOAD + rc = + qmckl_compute_tmp_c_cublas_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + #elif + rc = qmckl_compute_tmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + #endif + break; + default: + rc = qmckl_compute_tmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.tmp_c); + break; } - #endif - - #ifdef HAVE_CUBLAS_OFFLOAD - if(ctx->jastrow.offload_type == OFFLOAD_CUBLAS) { - qmckl_exit_code rc = - qmckl_compute_tmp_c_cublas_offload(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - default_compute = false; - if (rc != QMCKL_SUCCESS) { - return rc; - } - } - #endif - - rc = qmckl_compute_tmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.tmp_c); - if (rc != QMCKL_SUCCESS) { - return rc; - } - ctx->jastrow.tmp_c_date = ctx->date; } @@ -4980,15 +4988,61 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context) ctx->jastrow.dtmp_c = dtmp_c; } + switch(ctx->jastrow.offload_type) { + case OFFLOAD_OPENACC: + #ifdef HAVE_OPENACC_OFFLOAD + rc = qmckl_compute_dtmp_c_acc_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); + #elif + rc = qmckl_compute_dtmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); + #endif + break; + case OFFLOAD_CUBLAS: + #ifdef HAVE_CUBLAS_OFFLOAD + rc = qmckl_compute_dtmp_c_acc_offload(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); + #elif + rc = qmckl_compute_dtmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); + #endif + break; + default: + rc = qmckl_compute_dtmp_c(context, + ctx->jastrow.cord_num, + ctx->electron.num, + ctx->nucleus.num, + ctx->electron.walk_num, + ctx->jastrow.een_rescaled_e_deriv_e, + ctx->jastrow.een_rescaled_n, + ctx->jastrow.dtmp_c); + break; + } - rc = qmckl_compute_dtmp_c(context, - ctx->jastrow.cord_num, - ctx->electron.num, - ctx->nucleus.num, - ctx->electron.walk_num, - ctx->jastrow.een_rescaled_e_deriv_e, - ctx->jastrow.een_rescaled_n, - ctx->jastrow.dtmp_c); if (rc != QMCKL_SUCCESS) { return rc; } @@ -5617,7 +5671,6 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload ( const double* een_rescaled_n, double* const tmp_c ) { - if (context == QMCKL_NULL_CONTEXT) { return QMCKL_INVALID_CONTEXT; } @@ -5649,8 +5702,8 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload ( int stride_j_n = stride_k_n * nucl_num; int stride_nw_n = stride_j_n * (cord_num+1); - //#pragma acc parallel - //#pragma acc loop independent gang worker vector collapse(5) + #pragma acc parallel + #pragma acc loop independent gang worker vector collapse(5) for (int nw=0; nw < walk_num; ++nw) { for (int i=0; i