1
0
mirror of https://github.com/TREX-CoE/qmckl.git synced 2024-12-22 20:36:01 +01:00

Configure cuBLAS with --enable-gpu and clean code

This commit is contained in:
Aurelien Delval 2022-04-08 11:11:15 +02:00
parent 54f60480fa
commit ad531dddf9
2 changed files with 40 additions and 92 deletions

View File

@ -304,9 +304,10 @@ AS_IF([test "$enable_gpu" = "openacc"], [
]) ])
# cuBLAS offloading # cuBLAS offloading
AC_ARG_WITH(cublas, [AS_HELP_STRING([--with-cublas],[Use cuBLAS-offloaded functions])], HAVE_CUBLAS_OFFLOAD=$withval, HAVE_CUBLAS_OFFLOAD=no) HAVE_OPENACC_OFFLOAD="no"
AS_IF([test "$HAVE_CUBLAS_OFFLOAD" = "yes"], [ AS_IF([test "$enable_gpu" = "cublas"], [
AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines]) AC_DEFINE([HAVE_CUBLAS_OFFLOAD], [1], [If defined, activate cuBLAS-offloaded routines])
HAVE_CUBLAS_OFFLOAD="yes"
case $CC in case $CC in
*gcc*) *gcc*)

View File

@ -118,7 +118,6 @@ int main() {
#include "qmckl_jastrow_private_type.h" #include "qmckl_jastrow_private_type.h"
#ifdef HAVE_CUBLAS_OFFLOAD #ifdef HAVE_CUBLAS_OFFLOAD
#include <cuda_runtime_api.h>
#include "cublas_v2.h" #include "cublas_v2.h"
#endif #endif
@ -6229,30 +6228,6 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
{ {
qmckl_exit_code info; qmckl_exit_code info;
//Initialisation of cublas
cublasHandle_t handle;
if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS)
{
fprintf(stdout, "CUBLAS initialization failed!\n");
exit(EXIT_FAILURE);
}
qmckl_exit_code info;
//Initialisation of cublas
cublasHandle_t handle;
if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS)
{
fprintf(stdout, "CUBLAS initialization failed!\n");
exit(EXIT_FAILURE);
}
if (context == QMCKL_NULL_CONTEXT) { if (context == QMCKL_NULL_CONTEXT) {
return QMCKL_INVALID_CONTEXT; return QMCKL_INVALID_CONTEXT;
} }
@ -6269,6 +6244,14 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
return QMCKL_INVALID_ARG_4; return QMCKL_INVALID_ARG_4;
} }
//cuBLAS initialization
cublasHandle_t handle;
if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS)
{
fprintf(stdout, "CUBLAS initialization failed!\n");
exit(EXIT_FAILURE);
}
const double alpha = 1.0; const double alpha = 1.0;
const double beta = 0.0; const double beta = 0.0;
@ -6284,45 +6267,24 @@ qmckl_compute_tmp_c_cublas_offload (const qmckl_context context,
const int64_t bf = elec_num*nucl_num*(cord_num+1); const int64_t bf = elec_num*nucl_num*(cord_num+1);
const int64_t cf = bf; const int64_t cf = bf;
#pragma omp target enter data map(to:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num]) #pragma omp target enter data map(to:een_rescaled_e[0:elec_num*elec_num*(cord_num+1)*walk_num],een_rescaled_n[0:M*N*walk_num],tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num])
#pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c) #pragma omp target data use_device_ptr(een_rescaled_e,een_rescaled_n,tmp_c)
{ {
for (int nw=0; nw < walk_num; ++nw) { for (int nw=0; nw < walk_num; ++nw) {
int cublasError = cublasDgemmStridedBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha, int cublasError = cublasDgemmStridedBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha,
&(een_rescaled_e[nw*(cord_num+1)]), \ &(een_rescaled_e[nw*(cord_num+1)]),
LDA, af, \ LDA, af,
&(een_rescaled_n[bf*nw]), \ &(een_rescaled_n[bf*nw]),
LDB, 0, \ LDB, 0,
&beta, \ &beta,
&(tmp_c[nw*cord_num]), \ &(tmp_c[nw*cord_num]),
LDC, cf, cord_num); LDC, cf, cord_num);
//Manage cublas ERROR
if(cublasError != CUBLAS_STATUS_SUCCESS){
printf("CUBLAS ERROR %d", cublasError);
info = QMCKL_FAILURE;
return info;
}else{
info = QMCKL_SUCCESS;
}
} }
} }
cudaDeviceSynchronize();
cublasDestroy(handle);
#pragma omp target exit data map(from:tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num]) #pragma omp target exit data map(from:tmp_c[0:elec_num*nucl_num*(cord_num+1)*cord_num*walk_num])
cublasDestroy(handle);
return info; return info;
} }
#endif #endif
@ -6801,15 +6763,6 @@ qmckl_compute_dtmp_c_cublas_offload (
const double* een_rescaled_n, const double* een_rescaled_n,
double* const dtmp_c ) { double* const dtmp_c ) {
cublasHandle_t handle;
if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS)
{
fprintf(stdout, "CUBLAS initialization failed!\n");
exit(EXIT_FAILURE);
}
if (context == QMCKL_NULL_CONTEXT) { if (context == QMCKL_NULL_CONTEXT) {
return QMCKL_INVALID_CONTEXT; return QMCKL_INVALID_CONTEXT;
} }
@ -6832,6 +6785,14 @@ qmckl_compute_dtmp_c_cublas_offload (
qmckl_exit_code info = QMCKL_SUCCESS; qmckl_exit_code info = QMCKL_SUCCESS;
//cuBLAS initialization
cublasHandle_t handle;
if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS)
{
fprintf(stdout, "CUBLAS initialization failed!\n");
exit(EXIT_FAILURE);
}
const double alpha = 1.0; const double alpha = 1.0;
const double beta = 0.0; const double beta = 0.0;
@ -6847,38 +6808,24 @@ qmckl_compute_dtmp_c_cublas_offload (
const int64_t bf = elec_num*nucl_num*(cord_num+1); const int64_t bf = elec_num*nucl_num*(cord_num+1);
const int64_t cf = elec_num*4*nucl_num*(cord_num+1); const int64_t cf = elec_num*4*nucl_num*(cord_num+1);
#pragma omp target enter data map(to:een_rescaled_e_deriv_e[0:elec_num*4*elec_num*(cord_num+1)*walk_num], een_rescaled_n[0:elec_num*nucl_num*(cord_num+1)*walk_num], dtmp_c[0:elec_num*4*nucl_num*(cord_num+1)*cord_num*walk_num]) #pragma omp target enter data map(to:een_rescaled_e_deriv_e[0:elec_num*4*elec_num*(cord_num+1)*walk_num], een_rescaled_n[0:elec_num*nucl_num*(cord_num+1)*walk_num], dtmp_c[0:elec_num*4*nucl_num*(cord_num+1)*cord_num*walk_num])
#pragma omp target data use_device_ptr(een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c) #pragma omp target data use_device_ptr(een_rescaled_e_deriv_e, een_rescaled_n, dtmp_c)
{ {
for (int64_t nw=0; nw < walk_num; ++nw) { for (int64_t nw=0; nw < walk_num; ++nw) {
//Manage CUBLAS ERRORS int cublasError = cublasDgemmStridedBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha,
&(een_rescaled_e_deriv_e[(nw*(cord_num+1))]),
int cublasError = cublasDgemmStridedBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, M, N, K, &alpha, \ LDA, af,
&(een_rescaled_e_deriv_e[(nw*(cord_num+1))]), \ &(een_rescaled_n[bf*nw]), LDB, 0,
LDA, af, \ &beta,
&(een_rescaled_n[bf*nw]), \ &(dtmp_c[(nw*cord_num)]),
LDB, 0, \
&beta, \
&(dtmp_c[(nw*cord_num)]), \
LDC, cf, cord_num); LDC, cf, cord_num);
if(cublasError != CUBLAS_STATUS_SUCCESS){
printf("CUBLAS ERROR %d", cublasError);
info = QMCKL_FAILURE;
return info;
}else{
info = QMCKL_SUCCESS;
}
//}
}
} }
cudaDeviceSynchronize(); }
#pragma omp target exit data map(from:dtmp_c[0:cf*cord_num*walk_num])
cublasDestroy(handle); cublasDestroy(handle);
#pragma omp target exit data map(from:dtmp_c[0:cf*cord_num*walk_num])
return info; return info;
} }
#endif #endif