mirror of
https://gitlab.com/scemama/qp_plugins_scemama.git
synced 2025-01-03 01:55:52 +01:00
Compare commits
2 Commits
29903111f4
...
4d2adddbd9
Author | SHA1 | Date | |
---|---|---|---|
4d2adddbd9 | |||
b557b57a3b |
@ -5,6 +5,7 @@
|
|||||||
#include <cublas_v2.h>
|
#include <cublas_v2.h>
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include "gpu.h"
|
||||||
|
|
||||||
#define BLOCK_SIZE 16
|
#define BLOCK_SIZE 16
|
||||||
|
|
||||||
@ -27,13 +28,13 @@ void gpu_dgemm(char transa, char transb, int m, int n, int k, double alpha,
|
|||||||
cublasOperation_t ta, tb;
|
cublasOperation_t ta, tb;
|
||||||
|
|
||||||
if (transa == 'N') {
|
if (transa == 'N') {
|
||||||
cudaStat = cudaMalloc((void**)&d_A, (size_t) lda*k*sizeof(double));
|
cudaStat = gpu_malloc((void**)&d_A, (size_t) lda*k*sizeof(double));
|
||||||
assert(cudaStat == cudaSuccess);
|
assert(cudaStat == cudaSuccess);
|
||||||
cudaStat = cublasSetMatrix(m, k, sizeof(double), A, lda, d_A, lda);
|
cudaStat = cublasSetMatrix(m, k, sizeof(double), A, lda, d_A, lda);
|
||||||
assert(cudaStat == cudaSuccess);
|
assert(cudaStat == cudaSuccess);
|
||||||
ta = CUBLAS_OP_N;
|
ta = CUBLAS_OP_N;
|
||||||
} else {
|
} else {
|
||||||
cudaStat = cudaMalloc((void**)&d_A, (size_t) lda*m*sizeof(double));
|
cudaStat = gpu_malloc((void**)&d_A, (size_t) lda*m*sizeof(double));
|
||||||
assert(cudaStat == cudaSuccess);
|
assert(cudaStat == cudaSuccess);
|
||||||
cudaStat = cublasSetMatrix(k, m, sizeof(double), A, lda, d_A, lda);
|
cudaStat = cublasSetMatrix(k, m, sizeof(double), A, lda, d_A, lda);
|
||||||
assert(cudaStat == cudaSuccess);
|
assert(cudaStat == cudaSuccess);
|
||||||
@ -41,20 +42,20 @@ void gpu_dgemm(char transa, char transb, int m, int n, int k, double alpha,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (transb == 'N') {
|
if (transb == 'N') {
|
||||||
cudaStat = cudaMalloc((void**)&d_B, (size_t) ldb*n*sizeof(double));
|
cudaStat = gpu_malloc((void**)&d_B, (size_t) ldb*n*sizeof(double));
|
||||||
assert(cudaStat == cudaSuccess);
|
assert(cudaStat == cudaSuccess);
|
||||||
cudaStat = cublasSetMatrix(k, n, sizeof(double), B, ldb, d_B, ldb);
|
cudaStat = cublasSetMatrix(k, n, sizeof(double), B, ldb, d_B, ldb);
|
||||||
assert(cudaStat == cudaSuccess);
|
assert(cudaStat == cudaSuccess);
|
||||||
tb = CUBLAS_OP_N;
|
tb = CUBLAS_OP_N;
|
||||||
} else {
|
} else {
|
||||||
cudaStat = cudaMalloc((void**)&d_B, (size_t) ldb*k*sizeof(double));
|
cudaStat = gpu_malloc((void**)&d_B, (size_t) ldb*k*sizeof(double));
|
||||||
assert(cudaStat == cudaSuccess);
|
assert(cudaStat == cudaSuccess);
|
||||||
cudaStat = cublasSetMatrix(n, k, sizeof(double), B, ldb, d_B, ldb);
|
cudaStat = cublasSetMatrix(n, k, sizeof(double), B, ldb, d_B, ldb);
|
||||||
assert(cudaStat == cudaSuccess);
|
assert(cudaStat == cudaSuccess);
|
||||||
tb = CUBLAS_OP_T;
|
tb = CUBLAS_OP_T;
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaStat = cudaMalloc((void**)&d_C, (size_t) ldc*n*sizeof(double));
|
cudaStat = gpu_malloc((void**)&d_C, (size_t) ldc*n*sizeof(double));
|
||||||
assert(cudaStat == cudaSuccess);
|
assert(cudaStat == cudaSuccess);
|
||||||
if (beta != 0.) {
|
if (beta != 0.) {
|
||||||
cudaStat = cublasSetMatrix(m, n, sizeof(double), C, ldc, d_C, ldc);
|
cudaStat = cublasSetMatrix(m, n, sizeof(double), C, ldc, d_C, ldc);
|
||||||
|
Loading…
Reference in New Issue
Block a user