mirror of
https://github.com/TREX-CoE/qmckl.git
synced 2025-04-29 20:04:50 +02:00
Fixed naive GPU kernels and ignored variable issue
This commit is contained in:
parent
bc43113b6f
commit
eb71a752f5
114
org/ao_grid.f90
114
org/ao_grid.f90
@ -1,114 +0,0 @@
|
||||
subroutine qmckl_check_error(rc, message)
|
||||
use qmckl
|
||||
implicit none
|
||||
integer(qmckl_exit_code), intent(in) :: rc
|
||||
character(len=*) , intent(in) :: message
|
||||
character(len=128) :: str_buffer
|
||||
if (rc /= QMCKL_SUCCESS) then
|
||||
print *, message
|
||||
call qmckl_string_of_error(rc, str_buffer)
|
||||
print *, str_buffer
|
||||
call exit(rc)
|
||||
end if
|
||||
end subroutine qmckl_check_error
|
||||
|
||||
program ao_grid
|
||||
use qmckl
|
||||
implicit none
|
||||
|
||||
integer(qmckl_context) :: qmckl_ctx ! QMCkl context
|
||||
integer(qmckl_exit_code) :: rc ! Exit code of QMCkl functions
|
||||
|
||||
character(len=128) :: trexio_filename
|
||||
character(len=128) :: str_buffer
|
||||
integer :: ao_id
|
||||
integer :: point_num_x
|
||||
|
||||
integer(c_int64_t) :: nucl_num
|
||||
double precision, allocatable :: nucl_coord(:,:)
|
||||
|
||||
integer(c_int64_t) :: point_num
|
||||
integer(c_int64_t) :: ao_num
|
||||
integer(c_int64_t) :: ipoint, i, j, k
|
||||
double precision :: x, y, z, dr(3)
|
||||
double precision :: rmin(3), rmax(3)
|
||||
double precision, allocatable :: points(:,:)
|
||||
double precision, allocatable :: ao_vgl(:,:,:)
|
||||
|
||||
if (iargc() /= 3) then
|
||||
print *, 'Syntax: ao_grid <trexio_file> <AO_id> <point_num>'
|
||||
call exit(-1)
|
||||
end if
|
||||
call getarg(1, trexio_filename)
|
||||
call getarg(2, str_buffer)
|
||||
read(str_buffer, *) ao_id
|
||||
call getarg(3, str_buffer)
|
||||
read(str_buffer, *) point_num_x
|
||||
|
||||
if (point_num_x < 0 .or. point_num_x > 300) then
|
||||
print *, 'Error: 0 < point_num < 300'
|
||||
call exit(-1)
|
||||
end if
|
||||
|
||||
qmckl_ctx = qmckl_context_create()
|
||||
rc = qmckl_trexio_read(qmckl_ctx, trexio_filename, 1_8*len(trim(trexio_filename)))
|
||||
call qmckl_check_error(rc, 'Read TREXIO')
|
||||
|
||||
rc = qmckl_get_ao_basis_ao_num(qmckl_ctx, ao_num)
|
||||
call qmckl_check_error(rc, 'Getting ao_num')
|
||||
|
||||
if (ao_id < 0 .or. ao_id > ao_num) then
|
||||
print *, 'Error: 0 < ao_id < ', ao_num
|
||||
call exit(-1)
|
||||
end if
|
||||
|
||||
rc = qmckl_get_nucleus_num(qmckl_ctx, nucl_num)
|
||||
call qmckl_check_error(rc, 'Get nucleus num')
|
||||
|
||||
allocate( nucl_coord(3, nucl_num) )
|
||||
rc = qmckl_get_nucleus_coord(qmckl_ctx, 'N', nucl_coord, 3_8*nucl_num)
|
||||
call qmckl_check_error(rc, 'Get nucleus coord')
|
||||
|
||||
rmin(1) = minval( nucl_coord(1,:) ) - 5.d0
|
||||
rmin(2) = minval( nucl_coord(2,:) ) - 5.d0
|
||||
rmin(3) = minval( nucl_coord(3,:) ) - 5.d0
|
||||
|
||||
rmax(1) = maxval( nucl_coord(1,:) ) + 5.d0
|
||||
rmax(2) = maxval( nucl_coord(2,:) ) + 5.d0
|
||||
rmax(3) = maxval( nucl_coord(3,:) ) + 5.d0
|
||||
|
||||
dr(1:3) = (rmax(1:3) - rmin(1:3)) / dble(point_num_x-1)
|
||||
|
||||
point_num = point_num_x**3
|
||||
allocate( points(point_num, 3) )
|
||||
ipoint=0
|
||||
z = rmin(3)
|
||||
do k=1,point_num_x
|
||||
y = rmin(2)
|
||||
do j=1,point_num_x
|
||||
x = rmin(1)
|
||||
do i=1,point_num_x
|
||||
ipoint = ipoint+1
|
||||
points(ipoint,1) = x
|
||||
points(ipoint,2) = y
|
||||
points(ipoint,3) = z
|
||||
x = x + dr(1)
|
||||
end do
|
||||
y = y + dr(2)
|
||||
end do
|
||||
z = z + dr(3)
|
||||
end do
|
||||
|
||||
rc = qmckl_set_point(qmckl_ctx, 'T', points, point_num)
|
||||
call qmckl_check_error(rc, 'Setting points')
|
||||
|
||||
allocate( ao_vgl(ao_num, 5, point_num) )
|
||||
rc = qmckl_get_ao_basis_ao_vgl(qmckl_ctx, ao_vgl, ao_num*5_8*point_num)
|
||||
call qmckl_check_error(rc, 'Setting points')
|
||||
|
||||
do ipoint=1, point_num
|
||||
print '(3(F16.10,X),E20.10)', points(ipoint, 1:3), ao_vgl(ao_id,1,ipoint)
|
||||
end do
|
||||
|
||||
deallocate( nucl_coord, points, ao_vgl )
|
||||
end program ao_grid
|
@ -4889,57 +4889,65 @@ qmckl_exit_code qmckl_provide_tmp_c(qmckl_context context)
|
||||
}
|
||||
ctx->jastrow.tmp_c = tmp_c;
|
||||
}
|
||||
|
||||
/* Choose the correct compute function (depending on offload type) */
|
||||
bool default_compute = true;
|
||||
switch(ctx->jastrow.offload_type) {
|
||||
case OFFLOAD_OPENACC:
|
||||
#ifdef HAVE_OPENACC_OFFLOAD
|
||||
rc =
|
||||
qmckl_compute_tmp_c_acc_offload(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.tmp_c);
|
||||
#elif
|
||||
rc = qmckl_compute_tmp_c(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.tmp_c);
|
||||
|
||||
#ifdef HAVE_OPENACC_OFFLOAD
|
||||
if(ctx->jastrow.offload_type == OFFLOAD_OPENACC) {
|
||||
qmckl_exit_code rc =
|
||||
qmckl_compute_tmp_c_acc_offload(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.tmp_c);
|
||||
default_compute = false;
|
||||
if (rc != QMCKL_SUCCESS) {
|
||||
return rc;
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
case OFFLOAD_CUBLAS:
|
||||
#ifdef HAVE_CUBLAS_OFFLOAD
|
||||
rc =
|
||||
qmckl_compute_tmp_c_cublas_offload(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.tmp_c);
|
||||
#elif
|
||||
rc = qmckl_compute_tmp_c(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.tmp_c);
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
rc = qmckl_compute_tmp_c(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.tmp_c);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUBLAS_OFFLOAD
|
||||
if(ctx->jastrow.offload_type == OFFLOAD_CUBLAS) {
|
||||
qmckl_exit_code rc =
|
||||
qmckl_compute_tmp_c_cublas_offload(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.tmp_c);
|
||||
default_compute = false;
|
||||
if (rc != QMCKL_SUCCESS) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
rc = qmckl_compute_tmp_c(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.tmp_c);
|
||||
if (rc != QMCKL_SUCCESS) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
ctx->jastrow.tmp_c_date = ctx->date;
|
||||
}
|
||||
@ -4980,15 +4988,61 @@ qmckl_exit_code qmckl_provide_dtmp_c(qmckl_context context)
|
||||
ctx->jastrow.dtmp_c = dtmp_c;
|
||||
}
|
||||
|
||||
switch(ctx->jastrow.offload_type) {
|
||||
case OFFLOAD_OPENACC:
|
||||
#ifdef HAVE_OPENACC_OFFLOAD
|
||||
rc = qmckl_compute_dtmp_c_acc_offload(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e_deriv_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.dtmp_c);
|
||||
#elif
|
||||
rc = qmckl_compute_dtmp_c(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e_deriv_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.dtmp_c);
|
||||
#endif
|
||||
break;
|
||||
case OFFLOAD_CUBLAS:
|
||||
#ifdef HAVE_CUBLAS_OFFLOAD
|
||||
rc = qmckl_compute_dtmp_c_acc_offload(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e_deriv_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.dtmp_c);
|
||||
#elif
|
||||
rc = qmckl_compute_dtmp_c(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e_deriv_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.dtmp_c);
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
rc = qmckl_compute_dtmp_c(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e_deriv_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.dtmp_c);
|
||||
break;
|
||||
}
|
||||
|
||||
rc = qmckl_compute_dtmp_c(context,
|
||||
ctx->jastrow.cord_num,
|
||||
ctx->electron.num,
|
||||
ctx->nucleus.num,
|
||||
ctx->electron.walk_num,
|
||||
ctx->jastrow.een_rescaled_e_deriv_e,
|
||||
ctx->jastrow.een_rescaled_n,
|
||||
ctx->jastrow.dtmp_c);
|
||||
if (rc != QMCKL_SUCCESS) {
|
||||
return rc;
|
||||
}
|
||||
@ -5617,7 +5671,6 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload (
|
||||
const double* een_rescaled_n,
|
||||
double* const tmp_c ) {
|
||||
|
||||
|
||||
if (context == QMCKL_NULL_CONTEXT) {
|
||||
return QMCKL_INVALID_CONTEXT;
|
||||
}
|
||||
@ -5649,8 +5702,8 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload (
|
||||
int stride_j_n = stride_k_n * nucl_num;
|
||||
int stride_nw_n = stride_j_n * (cord_num+1);
|
||||
|
||||
//#pragma acc parallel
|
||||
//#pragma acc loop independent gang worker vector collapse(5)
|
||||
#pragma acc parallel
|
||||
#pragma acc loop independent gang worker vector collapse(5)
|
||||
for (int nw=0; nw < walk_num; ++nw) {
|
||||
for (int i=0; i<cord_num; ++i){
|
||||
|
||||
@ -5660,9 +5713,10 @@ qmckl_exit_code qmckl_compute_tmp_c_acc_offload (
|
||||
for (int l=0; l<elec_num; l++) {
|
||||
|
||||
// Single reduction
|
||||
tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] = 0;
|
||||
for (int m=0; m<elec_num; m++) {
|
||||
tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] =
|
||||
tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c, nw*stride_nw_c];
|
||||
tmp_c[l + k*stride_k_c + j*stride_j_c + i*stride_i_c + nw*stride_nw_c] +
|
||||
een_rescaled_e[l + m*stride_m_e + i*stride_i_e + nw*stride_nw_e] *
|
||||
een_rescaled_n[m + k*stride_k_n + j*stride_j_n + nw*stride_nw_n];
|
||||
}
|
||||
@ -6110,8 +6164,8 @@ qmckl_exit_code qmckl_compute_dtmp_c_acc_offload (
|
||||
int stride_nw_n = stride_j_n * (cord_num+1);
|
||||
|
||||
|
||||
//#pragma acc parallel
|
||||
//#pragma loop independent gang worker vector collapse(6)
|
||||
#pragma acc parallel
|
||||
#pragma loop independent gang worker vector collapse(6)
|
||||
for (int nw=0; nw < walk_num; nw++) {
|
||||
for (int i=0; i < cord_num; i++) {
|
||||
|
||||
@ -6122,12 +6176,12 @@ qmckl_exit_code qmckl_compute_dtmp_c_acc_offload (
|
||||
for(int m=0; m<elec_num; m++) {
|
||||
|
||||
// Single reduction
|
||||
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] = 0;
|
||||
for(int n=0; n<elec_num; n++){
|
||||
// TODO Fix indexing issues
|
||||
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] =
|
||||
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d];
|
||||
//een_rescaled_e_deriv_e[m + l * stride_l_e + n * stride_n_e + i * stride_i_e + nw * stride_nw_e] *
|
||||
//een_rescaled_n[n * + k * stride_k_n + j * stride_j_n + nw * stride_nw_n];
|
||||
dtmp_c[m + l * stride_l_d + k * stride_k_d + j * stride_j_d + i * stride_i_d + nw * stride_nw_d] +
|
||||
een_rescaled_e_deriv_e[m + l * stride_l_e + n * stride_n_e + i * stride_i_e + nw * stride_nw_e] *
|
||||
een_rescaled_n[n + k * stride_k_n + j * stride_j_n + nw * stride_nw_n];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user