1
0
mirror of https://gitlab.com/scemama/qp_plugins_scemama.git synced 2024-12-22 04:13:40 +01:00

Compare commits

...

2 Commits

Author SHA1 Message Date
4d2adddbd9 Fix CCSD GPU 2023-12-06 21:43:47 +01:00
b557b57a3b Added gpu_malloc in gpu_dgemm 2023-12-06 21:40:20 +01:00

View File

@ -5,6 +5,7 @@
#include <cublas_v2.h> #include <cublas_v2.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <assert.h> #include <assert.h>
#include "gpu.h"
#define BLOCK_SIZE 16 #define BLOCK_SIZE 16
@ -27,13 +28,13 @@ void gpu_dgemm(char transa, char transb, int m, int n, int k, double alpha,
cublasOperation_t ta, tb; cublasOperation_t ta, tb;
if (transa == 'N') { if (transa == 'N') {
cudaStat = cudaMalloc((void**)&d_A, (size_t) lda*k*sizeof(double)); cudaStat = gpu_malloc((void**)&d_A, (size_t) lda*k*sizeof(double));
assert(cudaStat == cudaSuccess); assert(cudaStat == cudaSuccess);
cudaStat = cublasSetMatrix(m, k, sizeof(double), A, lda, d_A, lda); cudaStat = cublasSetMatrix(m, k, sizeof(double), A, lda, d_A, lda);
assert(cudaStat == cudaSuccess); assert(cudaStat == cudaSuccess);
ta = CUBLAS_OP_N; ta = CUBLAS_OP_N;
} else { } else {
cudaStat = cudaMalloc((void**)&d_A, (size_t) lda*m*sizeof(double)); cudaStat = gpu_malloc((void**)&d_A, (size_t) lda*m*sizeof(double));
assert(cudaStat == cudaSuccess); assert(cudaStat == cudaSuccess);
cudaStat = cublasSetMatrix(k, m, sizeof(double), A, lda, d_A, lda); cudaStat = cublasSetMatrix(k, m, sizeof(double), A, lda, d_A, lda);
assert(cudaStat == cudaSuccess); assert(cudaStat == cudaSuccess);
@ -41,20 +42,20 @@ void gpu_dgemm(char transa, char transb, int m, int n, int k, double alpha,
} }
if (transb == 'N') { if (transb == 'N') {
cudaStat = cudaMalloc((void**)&d_B, (size_t) ldb*n*sizeof(double)); cudaStat = gpu_malloc((void**)&d_B, (size_t) ldb*n*sizeof(double));
assert(cudaStat == cudaSuccess); assert(cudaStat == cudaSuccess);
cudaStat = cublasSetMatrix(k, n, sizeof(double), B, ldb, d_B, ldb); cudaStat = cublasSetMatrix(k, n, sizeof(double), B, ldb, d_B, ldb);
assert(cudaStat == cudaSuccess); assert(cudaStat == cudaSuccess);
tb = CUBLAS_OP_N; tb = CUBLAS_OP_N;
} else { } else {
cudaStat = cudaMalloc((void**)&d_B, (size_t) ldb*k*sizeof(double)); cudaStat = gpu_malloc((void**)&d_B, (size_t) ldb*k*sizeof(double));
assert(cudaStat == cudaSuccess); assert(cudaStat == cudaSuccess);
cudaStat = cublasSetMatrix(n, k, sizeof(double), B, ldb, d_B, ldb); cudaStat = cublasSetMatrix(n, k, sizeof(double), B, ldb, d_B, ldb);
assert(cudaStat == cudaSuccess); assert(cudaStat == cudaSuccess);
tb = CUBLAS_OP_T; tb = CUBLAS_OP_T;
} }
cudaStat = cudaMalloc((void**)&d_C, (size_t) ldc*n*sizeof(double)); cudaStat = gpu_malloc((void**)&d_C, (size_t) ldc*n*sizeof(double));
assert(cudaStat == cudaSuccess); assert(cudaStat == cudaSuccess);
if (beta != 0.) { if (beta != 0.) {
cudaStat = cublasSetMatrix(m, n, sizeof(double), C, ldc, d_C, ldc); cudaStat = cublasSetMatrix(m, n, sizeof(double), C, ldc, d_C, ldc);