mirror of
https://github.com/QuantumPackage/qp2.git
synced 2024-11-04 05:03:50 +01:00
Added Nvidia module
This commit is contained in:
parent
fa6d141949
commit
caec518384
@ -384,17 +384,17 @@ subroutine update_tau_space(nO,nV,h_t1,t1,t2,tau)
|
|||||||
!$OMP SHARED(nO,nV,tau,t2,t1,h_t1,stream,blas) &
|
!$OMP SHARED(nO,nV,tau,t2,t1,h_t1,stream,blas) &
|
||||||
!$OMP PRIVATE(i,j,a,b) &
|
!$OMP PRIVATE(i,j,a,b) &
|
||||||
!$OMP DEFAULT(NONE)
|
!$OMP DEFAULT(NONE)
|
||||||
do j=1,nO
|
!$OMP DO
|
||||||
!$OMP DO
|
do b=1,nV
|
||||||
do b=1,nV
|
call gpu_set_stream(blas,stream(b))
|
||||||
call gpu_set_stream(blas,stream(b))
|
do j=1,nO
|
||||||
call gpu_dgeam_c(blas%c, 'N', 'N', nO*1_8, nV*1_8, &
|
call gpu_dgeam_c(blas%c, 'N', 'N', nO*1_8, nV*1_8, &
|
||||||
1.d0, c_loc(t2%f(1,j,1,b)), nO*nO*1_8, &
|
1.d0, c_loc(t2%f(1,j,1,b)), nO*nO*1_8, &
|
||||||
h_t1(j,b), t1%c, nO*1_8, &
|
h_t1(j,b), t1%c, nO*1_8, &
|
||||||
c_loc(tau%f(1,j,1,b)), nO*nO*1_8)
|
c_loc(tau%f(1,j,1,b)), nO*nO*1_8)
|
||||||
enddo
|
enddo
|
||||||
!$OMP END DO
|
|
||||||
enddo
|
enddo
|
||||||
|
!$OMP END DO
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
|
|
||||||
call gpu_synchronize()
|
call gpu_synchronize()
|
||||||
|
@ -120,7 +120,7 @@ module gpu
|
|||||||
|
|
||||||
subroutine gpu_ddot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_ddot')
|
subroutine gpu_ddot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_ddot')
|
||||||
import
|
import
|
||||||
type(c_ptr), intent(in), value :: handle
|
type(c_ptr), intent(in) :: handle
|
||||||
integer(c_int64_t), value :: n, incx, incy
|
integer(c_int64_t), value :: n, incx, incy
|
||||||
type(c_ptr), intent(in), value :: dx, dy
|
type(c_ptr), intent(in), value :: dx, dy
|
||||||
real(c_double), intent(out) :: res
|
real(c_double), intent(out) :: res
|
||||||
@ -128,7 +128,7 @@ module gpu
|
|||||||
|
|
||||||
subroutine gpu_sdot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_sdot')
|
subroutine gpu_sdot_c(handle, n, dx, incx, dy, incy, res) bind(C, name='gpu_sdot')
|
||||||
import
|
import
|
||||||
type(c_ptr), intent(in), value :: handle
|
type(c_ptr), intent(in) :: handle
|
||||||
integer(c_int64_t), value :: n, incx, incy
|
integer(c_int64_t), value :: n, incx, incy
|
||||||
type(c_ptr), intent(in), value :: dx, dy
|
type(c_ptr), intent(in), value :: dx, dy
|
||||||
real(c_float), intent(out) :: res
|
real(c_float), intent(out) :: res
|
||||||
@ -137,7 +137,7 @@ module gpu
|
|||||||
subroutine gpu_dgeam_c(handle, transa, transb, m, n, alpha, a, lda, beta, &
|
subroutine gpu_dgeam_c(handle, transa, transb, m, n, alpha, a, lda, beta, &
|
||||||
b, ldb, c, ldc) bind(C, name='gpu_dgeam')
|
b, ldb, c, ldc) bind(C, name='gpu_dgeam')
|
||||||
import
|
import
|
||||||
type(c_ptr), intent(in), value :: handle
|
type(c_ptr), intent(in) :: handle
|
||||||
character(c_char), intent(in), value :: transa, transb
|
character(c_char), intent(in), value :: transa, transb
|
||||||
integer(c_int64_t), intent(in), value :: m, n, lda, ldb, ldc
|
integer(c_int64_t), intent(in), value :: m, n, lda, ldb, ldc
|
||||||
real(c_double), intent(in), value :: alpha, beta
|
real(c_double), intent(in), value :: alpha, beta
|
||||||
|
1
src/gpu_nvidia/LIB
Normal file
1
src/gpu_nvidia/LIB
Normal file
@ -0,0 +1 @@
|
|||||||
|
-lcudart -lcublas -lcublasLt
|
1
src/gpu_nvidia/NEED
Normal file
1
src/gpu_nvidia/NEED
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
5
src/gpu_nvidia/README.rst
Normal file
5
src/gpu_nvidia/README.rst
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
==========
|
||||||
|
gpu_nvidia
|
||||||
|
==========
|
||||||
|
|
||||||
|
Nvidia implementation of GPU routines. Uses CUDA and CUBLAS libraries.
|
327
src/gpu_nvidia/gpu.c
Normal file
327
src/gpu_nvidia/gpu.c
Normal file
@ -0,0 +1,327 @@
|
|||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#include <cublas_v2.h>
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
|
||||||
|
|
||||||
|
/* Generic functions */
|
||||||
|
|
||||||
|
int gpu_ndevices() {
|
||||||
|
int ngpus;
|
||||||
|
cudaGetDeviceCount(&ngpus);
|
||||||
|
return ngpus;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_set_device(int32_t igpu) {
|
||||||
|
cudaSetDevice(igpu);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Allocation functions */
|
||||||
|
|
||||||
|
void gpu_allocate(void** ptr, const int64_t size) {
|
||||||
|
size_t free, total;
|
||||||
|
cudaError_t rc = cudaMemGetInfo( &free, &total );
|
||||||
|
if (rc != cudaSuccess) {
|
||||||
|
free = INT64_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Use managed memory if it does not fit on the GPU */
|
||||||
|
if (size < free && size < total/2) {
|
||||||
|
// rc= cudaMalloc(ptr, size);
|
||||||
|
rc = cudaMallocManaged(ptr, size, cudaMemAttachGlobal);
|
||||||
|
} else {
|
||||||
|
rc = cudaMallocManaged(ptr, size, cudaMemAttachGlobal);
|
||||||
|
}
|
||||||
|
assert (rc == cudaSuccess);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_deallocate(void** ptr) {
|
||||||
|
assert (*ptr != NULL);
|
||||||
|
cudaFree(*ptr);
|
||||||
|
*ptr = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Memory transfer functions */
|
||||||
|
|
||||||
|
void gpu_upload(const void* cpu_ptr, void* gpu_ptr, const int64_t n) {
|
||||||
|
cudaMemcpy (gpu_ptr, cpu_ptr, n, cudaMemcpyHostToDevice);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_download(const void* gpu_ptr, void* cpu_ptr, const int64_t n) {
|
||||||
|
cudaMemcpy (cpu_ptr, gpu_ptr, n, cudaMemcpyDeviceToHost);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) {
|
||||||
|
cudaMemcpy (gpu_ptr_dest, gpu_ptr_src, n, cudaMemcpyDeviceToDevice);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Streams */
|
||||||
|
|
||||||
|
void gpu_stream_create(void** ptr) {
|
||||||
|
cudaStream_t stream;
|
||||||
|
cudaError_t rc = cudaStreamCreate(&stream);
|
||||||
|
assert (rc == cudaSuccess);
|
||||||
|
*ptr = (void*) stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_stream_destroy(void** ptr) {
|
||||||
|
assert (*ptr != NULL);
|
||||||
|
cudaError_t rc = cudaStreamDestroy( (cudaStream_t) *ptr);
|
||||||
|
assert (rc == cudaSuccess);
|
||||||
|
*ptr = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_set_stream(void** handle, void** stream) {
|
||||||
|
cublasSetStream( (cublasHandle_t) *handle, (cudaStream_t) *stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gpu_synchronize() {
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* BLAS functions */
|
||||||
|
|
||||||
|
void gpu_blas_create(void** handle) {
|
||||||
|
cublasHandle_t cublas_handle;
|
||||||
|
cublasStatus_t rc = cublasCreate(&cublas_handle);
|
||||||
|
assert (rc == CUBLAS_STATUS_SUCCESS);
|
||||||
|
*handle = (void*) cublas_handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_blas_destroy(void** handle) {
|
||||||
|
assert (*handle != NULL);
|
||||||
|
cublasStatus_t rc = cublasDestroy( (cublasHandle_t) *handle);
|
||||||
|
assert (rc == CUBLAS_STATUS_SUCCESS);
|
||||||
|
*handle = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_ddot(void** handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) {
|
||||||
|
assert (*handle != NULL);
|
||||||
|
|
||||||
|
/* Convert to int32_t */
|
||||||
|
int32_t n_, incx_, incy_;
|
||||||
|
|
||||||
|
n_ = (int32_t) n;
|
||||||
|
incx_ = (int32_t) incx;
|
||||||
|
incy_ = (int32_t) incy;
|
||||||
|
|
||||||
|
/* Check for integer overflows */
|
||||||
|
assert ( (int64_t) n_ == n );
|
||||||
|
assert ( (int64_t) incx_ == incx);
|
||||||
|
assert ( (int64_t) incy_ == incy);
|
||||||
|
|
||||||
|
cublasDdot((cublasHandle_t) *handle, n_, x, incx_, y, incy_, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_sdot(void** handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) {
|
||||||
|
assert (*handle != NULL);
|
||||||
|
|
||||||
|
/* Convert to int32_t */
|
||||||
|
int32_t n_, incx_, incy_;
|
||||||
|
|
||||||
|
n_ = (int32_t) n;
|
||||||
|
incx_ = (int32_t) incx;
|
||||||
|
incy_ = (int32_t) incy;
|
||||||
|
|
||||||
|
/* Check for integer overflows */
|
||||||
|
assert ( (int64_t) n_ == n );
|
||||||
|
assert ( (int64_t) incx_ == incx);
|
||||||
|
assert ( (int64_t) incy_ == incy);
|
||||||
|
|
||||||
|
cublasSdot((cublasHandle_t) *handle, n_, x, incx_, y, incy_, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_dgemv(void** handle, const char transa, const int64_t m, const int64_t n, const double alpha,
|
||||||
|
const double* a, const int64_t lda, const double* x, const int64_t incx, const double beta, double* y, const int64_t incy) {
|
||||||
|
|
||||||
|
assert (*handle != NULL);
|
||||||
|
|
||||||
|
/* Convert to int32_t */
|
||||||
|
int32_t m_, n_, lda_, incx_, incy_;
|
||||||
|
|
||||||
|
m_ = (int32_t) m;
|
||||||
|
n_ = (int32_t) n;
|
||||||
|
lda_ = (int32_t) lda;
|
||||||
|
incx_ = (int32_t) incx;
|
||||||
|
incy_ = (int32_t) incy;
|
||||||
|
|
||||||
|
/* Check for integer overflows */
|
||||||
|
assert ( (int64_t) m_ == m );
|
||||||
|
assert ( (int64_t) n_ == n );
|
||||||
|
assert ( (int64_t) lda_ == lda );
|
||||||
|
assert ( (int64_t) incx_ == incx);
|
||||||
|
assert ( (int64_t) incy_ == incy);
|
||||||
|
|
||||||
|
cublasOperation_t transa_ = CUBLAS_OP_N;
|
||||||
|
if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T;
|
||||||
|
|
||||||
|
cublasDgemv((cublasHandle_t) *handle, transa_, m_, n_, &alpha, a, lda_, x, incx_, &beta, y, incy_);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_sgemv(void** handle, const char transa, const int64_t m, const int64_t n, const float alpha,
|
||||||
|
const float* a, const int64_t lda, const float* x, const int64_t incx, const float beta, float* y, const int64_t incy) {
|
||||||
|
|
||||||
|
assert (*handle != NULL);
|
||||||
|
|
||||||
|
/* Convert to int32_t */
|
||||||
|
int32_t m_, n_, lda_, incx_, incy_;
|
||||||
|
|
||||||
|
m_ = (int32_t) m;
|
||||||
|
n_ = (int32_t) n;
|
||||||
|
lda_ = (int32_t) lda;
|
||||||
|
incx_ = (int32_t) incx;
|
||||||
|
incy_ = (int32_t) incy;
|
||||||
|
|
||||||
|
/* Check for integer overflows */
|
||||||
|
assert ( (int64_t) m_ == m );
|
||||||
|
assert ( (int64_t) n_ == n );
|
||||||
|
assert ( (int64_t) lda_ == lda );
|
||||||
|
assert ( (int64_t) incx_ == incx);
|
||||||
|
assert ( (int64_t) incy_ == incy);
|
||||||
|
|
||||||
|
cublasOperation_t transa_ = CUBLAS_OP_N;
|
||||||
|
if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T;
|
||||||
|
|
||||||
|
cublasSgemv((cublasHandle_t) *handle, transa_, m_, n_, &alpha, a, lda_, x, incx_, &beta, y, incy_);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_dgemm(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double alpha,
|
||||||
|
const double* a, const int64_t lda, const double* b, const int64_t ldb, const double beta, double* c, const int64_t ldc) {
|
||||||
|
|
||||||
|
assert (*handle != NULL);
|
||||||
|
|
||||||
|
/* Convert to int32_t */
|
||||||
|
int32_t m_, n_, k_, lda_, ldb_, ldc_;
|
||||||
|
|
||||||
|
m_ = (int32_t) m;
|
||||||
|
n_ = (int32_t) n;
|
||||||
|
k_ = (int32_t) k;
|
||||||
|
lda_ = (int32_t) lda;
|
||||||
|
ldb_ = (int32_t) ldb;
|
||||||
|
ldc_ = (int32_t) ldc;
|
||||||
|
|
||||||
|
/* Check for integer overflows */
|
||||||
|
assert ( (int64_t) m_ == m );
|
||||||
|
assert ( (int64_t) n_ == n );
|
||||||
|
assert ( (int64_t) k_ == k );
|
||||||
|
assert ( (int64_t) lda_ == lda);
|
||||||
|
assert ( (int64_t) ldb_ == ldb);
|
||||||
|
assert ( (int64_t) ldc_ == ldc);
|
||||||
|
|
||||||
|
cublasOperation_t transa_ = CUBLAS_OP_N;
|
||||||
|
cublasOperation_t transb_ = CUBLAS_OP_N;
|
||||||
|
if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T;
|
||||||
|
if (transb == 'T' || transb == 't') transb_ = CUBLAS_OP_T;
|
||||||
|
|
||||||
|
cublasDgemm((cublasHandle_t) *handle, transa_, transb_, m_, n_, k_, &alpha, a, lda_, b, ldb_, &beta, c, ldc_);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_sgemm(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float alpha,
|
||||||
|
const float* a, const int64_t lda, const float* b, const int64_t ldb, const float beta, float* c, const int64_t ldc) {
|
||||||
|
|
||||||
|
assert (*handle != NULL);
|
||||||
|
|
||||||
|
/* Convert to int32_t */
|
||||||
|
int32_t m_, n_, k_, lda_, ldb_, ldc_;
|
||||||
|
|
||||||
|
m_ = (int32_t) m;
|
||||||
|
n_ = (int32_t) n;
|
||||||
|
k_ = (int32_t) k;
|
||||||
|
lda_ = (int32_t) lda;
|
||||||
|
ldb_ = (int32_t) ldb;
|
||||||
|
ldc_ = (int32_t) ldc;
|
||||||
|
|
||||||
|
/* Check for integer overflows */
|
||||||
|
assert ( (int64_t) m_ == m );
|
||||||
|
assert ( (int64_t) n_ == n );
|
||||||
|
assert ( (int64_t) k_ == k );
|
||||||
|
assert ( (int64_t) lda_ == lda);
|
||||||
|
assert ( (int64_t) ldb_ == ldb);
|
||||||
|
assert ( (int64_t) ldc_ == ldc);
|
||||||
|
|
||||||
|
cublasOperation_t transa_ = CUBLAS_OP_N;
|
||||||
|
cublasOperation_t transb_ = CUBLAS_OP_N;
|
||||||
|
if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T;
|
||||||
|
if (transb == 'T' || transb == 't') transb_ = CUBLAS_OP_T;
|
||||||
|
|
||||||
|
cublasSgemm((cublasHandle_t) *handle, transa_, transb_, m_, n_, k_, &alpha, a, lda_, b, ldb_, &beta, c, ldc_);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_dgeam(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const double alpha,
|
||||||
|
const double* a, const int64_t lda, const double beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) {
|
||||||
|
assert (*handle != NULL);
|
||||||
|
|
||||||
|
/* Convert to int32_t */
|
||||||
|
int32_t m_, n_, lda_, ldb_, ldc_;
|
||||||
|
|
||||||
|
m_ = (int32_t) m;
|
||||||
|
n_ = (int32_t) n;
|
||||||
|
lda_ = (int32_t) lda;
|
||||||
|
ldb_ = (int32_t) ldb;
|
||||||
|
ldc_ = (int32_t) ldc;
|
||||||
|
|
||||||
|
/* Check for integer overflows */
|
||||||
|
assert ( (int64_t) m_ == m );
|
||||||
|
assert ( (int64_t) n_ == n );
|
||||||
|
assert ( (int64_t) lda_ == lda);
|
||||||
|
assert ( (int64_t) ldb_ == ldb);
|
||||||
|
assert ( (int64_t) ldc_ == ldc);
|
||||||
|
|
||||||
|
cublasOperation_t transa_ = CUBLAS_OP_N;
|
||||||
|
cublasOperation_t transb_ = CUBLAS_OP_N;
|
||||||
|
if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T;
|
||||||
|
if (transb == 'T' || transb == 't') transb_ = CUBLAS_OP_T;
|
||||||
|
|
||||||
|
cublasDgeam((cublasHandle_t) *handle, transa_, transb_, m_, n_, &alpha, a, lda_, &beta, b, ldb_, c, ldc_);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void gpu_sgeam(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const float alpha,
|
||||||
|
const float* a, const int64_t lda, const float beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) {
|
||||||
|
assert (*handle != NULL);
|
||||||
|
|
||||||
|
/* Convert to int32_t */
|
||||||
|
int32_t m_, n_, lda_, ldb_, ldc_;
|
||||||
|
|
||||||
|
m_ = (int32_t) m;
|
||||||
|
n_ = (int32_t) n;
|
||||||
|
lda_ = (int32_t) lda;
|
||||||
|
ldb_ = (int32_t) ldb;
|
||||||
|
ldc_ = (int32_t) ldc;
|
||||||
|
|
||||||
|
/* Check for integer overflows */
|
||||||
|
assert ( (int64_t) m_ == m );
|
||||||
|
assert ( (int64_t) n_ == n );
|
||||||
|
assert ( (int64_t) lda_ == lda);
|
||||||
|
assert ( (int64_t) ldb_ == ldb);
|
||||||
|
assert ( (int64_t) ldc_ == ldc);
|
||||||
|
|
||||||
|
cublasOperation_t transa_ = CUBLAS_OP_N;
|
||||||
|
cublasOperation_t transb_ = CUBLAS_OP_N;
|
||||||
|
if (transa == 'T' || transa == 't') transa_ = CUBLAS_OP_T;
|
||||||
|
if (transb == 'T' || transb == 't') transb_ = CUBLAS_OP_T;
|
||||||
|
|
||||||
|
cublasSgeam((cublasHandle_t) *handle, transa_, transb_, m_, n_, &alpha, a, lda_, &beta, b, ldb_, c, ldc_);
|
||||||
|
|
||||||
|
}
|
@ -56,7 +56,7 @@ void gpu_stream_destroy(void** ptr) {
|
|||||||
*ptr = NULL;
|
*ptr = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void gpu_set_stream(void* handle, void* stream) {
|
void gpu_set_stream(void** handle, void** stream) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -79,8 +79,8 @@ void gpu_blas_destroy(void** handle) {
|
|||||||
|
|
||||||
double ddot_(const int32_t* n, const double* x, const int32_t* incx, const double* y, const int32_t* incy);
|
double ddot_(const int32_t* n, const double* x, const int32_t* incx, const double* y, const int32_t* incy);
|
||||||
|
|
||||||
void gpu_ddot(const void* handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) {
|
void gpu_ddot(void** handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) {
|
||||||
assert (handle != NULL);
|
assert (*handle != NULL);
|
||||||
|
|
||||||
/* Convert to int32_t */
|
/* Convert to int32_t */
|
||||||
int32_t n_, incx_, incy_;
|
int32_t n_, incx_, incy_;
|
||||||
@ -100,8 +100,8 @@ void gpu_ddot(const void* handle, const int64_t n, const double* x, const int64_
|
|||||||
|
|
||||||
float sdot_(const int32_t* n, const float* x, const int32_t* incx, const float* y, const int32_t* incy);
|
float sdot_(const int32_t* n, const float* x, const int32_t* incx, const float* y, const int32_t* incy);
|
||||||
|
|
||||||
void gpu_sdot(const void* handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) {
|
void gpu_sdot(void** handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) {
|
||||||
assert (handle != NULL);
|
assert (*handle != NULL);
|
||||||
|
|
||||||
/* Convert to int32_t */
|
/* Convert to int32_t */
|
||||||
int32_t n_, incx_, incy_;
|
int32_t n_, incx_, incy_;
|
||||||
@ -122,10 +122,10 @@ void gpu_sdot(const void* handle, const int64_t n, const float* x, const int64_t
|
|||||||
void dgemv_(const char* transa, const int32_t* m, const int32_t* n, const double* alpha,
|
void dgemv_(const char* transa, const int32_t* m, const int32_t* n, const double* alpha,
|
||||||
const double* a, const int32_t* lda, const double* x, const int32_t* incx, const double* beta, double* y, const int32_t* incy);
|
const double* a, const int32_t* lda, const double* x, const int32_t* incx, const double* beta, double* y, const int32_t* incy);
|
||||||
|
|
||||||
void gpu_dgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const double alpha,
|
void gpu_dgemv(void** handle, const char transa, const int64_t m, const int64_t n, const double alpha,
|
||||||
const double* a, const int64_t lda, const double* x, const int64_t incx, const double beta, double* y, const int64_t incy) {
|
const double* a, const int64_t lda, const double* x, const int64_t incx, const double beta, double* y, const int64_t incy) {
|
||||||
|
|
||||||
assert (handle != NULL);
|
assert (*handle != NULL);
|
||||||
|
|
||||||
/* Convert to int32_t */
|
/* Convert to int32_t */
|
||||||
int32_t m_, n_, lda_, incx_, incy_;
|
int32_t m_, n_, lda_, incx_, incy_;
|
||||||
@ -150,10 +150,10 @@ void gpu_dgemv(const void* handle, const char transa, const int64_t m, const int
|
|||||||
void sgemv_(const char* transa, const int32_t* m, const int32_t* n, const float* alpha,
|
void sgemv_(const char* transa, const int32_t* m, const int32_t* n, const float* alpha,
|
||||||
const float* a, const int32_t* lda, const float* x, const int32_t* incx, const float* beta, float* y, const int32_t* incy);
|
const float* a, const int32_t* lda, const float* x, const int32_t* incx, const float* beta, float* y, const int32_t* incy);
|
||||||
|
|
||||||
void gpu_sgemv(const void* handle, const char transa, const int64_t m, const int64_t n, const float alpha,
|
void gpu_sgemv(void** handle, const char transa, const int64_t m, const int64_t n, const float alpha,
|
||||||
const float* a, const int64_t lda, const float* x, const int64_t incx, const float beta, float* y, const int64_t incy) {
|
const float* a, const int64_t lda, const float* x, const int64_t incx, const float beta, float* y, const int64_t incy) {
|
||||||
|
|
||||||
assert (handle != NULL);
|
assert (*handle != NULL);
|
||||||
|
|
||||||
/* Convert to int32_t */
|
/* Convert to int32_t */
|
||||||
int32_t m_, n_, lda_, incx_, incy_;
|
int32_t m_, n_, lda_, incx_, incy_;
|
||||||
@ -178,10 +178,10 @@ void gpu_sgemv(const void* handle, const char transa, const int64_t m, const int
|
|||||||
void dgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const double* alpha,
|
void dgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const double* alpha,
|
||||||
const double* a, const int32_t* lda, const double* b, const int32_t* ldb, const double* beta, double* c, const int32_t* ldc);
|
const double* a, const int32_t* lda, const double* b, const int32_t* ldb, const double* beta, double* c, const int32_t* ldc);
|
||||||
|
|
||||||
void gpu_dgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double alpha,
|
void gpu_dgemm(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const double alpha,
|
||||||
const double* a, const int64_t lda, const double* b, const int64_t ldb, const double beta, double* c, const int64_t ldc) {
|
const double* a, const int64_t lda, const double* b, const int64_t ldb, const double beta, double* c, const int64_t ldc) {
|
||||||
|
|
||||||
assert (handle != NULL);
|
assert (*handle != NULL);
|
||||||
|
|
||||||
/* Convert to int32_t */
|
/* Convert to int32_t */
|
||||||
int32_t m_, n_, k_, lda_, ldb_, ldc_;
|
int32_t m_, n_, k_, lda_, ldb_, ldc_;
|
||||||
@ -209,10 +209,10 @@ void gpu_dgemm(const void* handle, const char transa, const char transb, const i
|
|||||||
void sgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const float* alpha,
|
void sgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const float* alpha,
|
||||||
const float* a, const int32_t* lda, const float* b, const int32_t* ldb, const float* beta, float* c, const int32_t* ldc);
|
const float* a, const int32_t* lda, const float* b, const int32_t* ldb, const float* beta, float* c, const int32_t* ldc);
|
||||||
|
|
||||||
void gpu_sgemm(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float alpha,
|
void gpu_sgemm(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const int64_t k, const float alpha,
|
||||||
const float* a, const int64_t lda, const float* b, const int64_t ldb, const float beta, float* c, const int64_t ldc) {
|
const float* a, const int64_t lda, const float* b, const int64_t ldb, const float beta, float* c, const int64_t ldc) {
|
||||||
|
|
||||||
assert (handle != NULL);
|
assert (*handle != NULL);
|
||||||
|
|
||||||
/* Convert to int32_t */
|
/* Convert to int32_t */
|
||||||
int32_t m_, n_, k_, lda_, ldb_, ldc_;
|
int32_t m_, n_, k_, lda_, ldb_, ldc_;
|
||||||
@ -236,12 +236,9 @@ void gpu_sgemm(const void* handle, const char transa, const char transb, const i
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void gpu_dgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const double alpha,
|
void gpu_dgeam(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const double alpha,
|
||||||
const double* a, const int64_t lda, const double beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) {
|
const double* a, const int64_t lda, const double beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) {
|
||||||
if (handle == NULL) {
|
assert (*handle != NULL);
|
||||||
perror("NULL handle");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( (transa == 'N' && transb == 'N') ||
|
if ( (transa == 'N' && transb == 'N') ||
|
||||||
(transa == 'n' && transb == 'N') ||
|
(transa == 'n' && transb == 'N') ||
|
||||||
@ -371,12 +368,9 @@ void gpu_dgeam(const void* handle, const char transa, const char transb, const i
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void gpu_sgeam(const void* handle, const char transa, const char transb, const int64_t m, const int64_t n, const float alpha,
|
void gpu_sgeam(void** handle, const char transa, const char transb, const int64_t m, const int64_t n, const float alpha,
|
||||||
const float* a, const int64_t lda, const float beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) {
|
const float* a, const int64_t lda, const float beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) {
|
||||||
if (handle == NULL) {
|
assert (*handle != NULL);
|
||||||
perror("NULL handle");
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( (transa == 'N' && transb == 'N') ||
|
if ( (transa == 'N' && transb == 'N') ||
|
||||||
(transa == 'n' && transb == 'N') ||
|
(transa == 'n' && transb == 'N') ||
|
||||||
|
Loading…
Reference in New Issue
Block a user