9
1
mirror of https://github.com/QuantumPackage/qp2.git synced 2024-11-14 01:13:38 +01:00
qp2/plugins/local/gpu_x86/gpu.c

503 lines
13 KiB
C
Raw Normal View History

2024-06-25 18:32:44 +02:00
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
2024-06-29 02:27:50 +02:00
#include <stdbool.h>
2024-06-25 18:32:44 +02:00
#include <assert.h>
/* Generic functions */
int gpu_ndevices() {
2024-07-01 18:04:48 +02:00
return 0;
2024-06-25 18:32:44 +02:00
}
void gpu_set_device(int32_t i) {
return;
}
/* Allocation functions */
void gpu_allocate(void** ptr, const int64_t n) {
*ptr = malloc((size_t) n);
if (*ptr == NULL) {
perror("Allocation failed");
}
}
2024-06-27 12:06:06 +02:00
void gpu_deallocate(void** ptr) {
2024-06-25 18:32:44 +02:00
free(*ptr);
*ptr = NULL;
}
/* Memory transfer functions */
void gpu_upload(const void* cpu_ptr, void* gpu_ptr, const int64_t n) {
memcpy(gpu_ptr, cpu_ptr, n);
}
void gpu_download(const void* gpu_ptr, void* cpu_ptr, const int64_t n) {
memcpy(cpu_ptr, gpu_ptr, n);
}
void gpu_copy(const void* gpu_ptr_src, void* gpu_ptr_dest, const int64_t n) {
memcpy(gpu_ptr_dest, gpu_ptr_src, n);
}
/* Streams */
void gpu_stream_create(void** ptr) {
2024-07-02 13:58:19 +02:00
*ptr = (void*) malloc(sizeof(char));
2024-06-25 18:32:44 +02:00
}
void gpu_stream_destroy(void** ptr) {
2024-07-02 13:58:19 +02:00
free(*ptr);
2024-06-25 18:32:44 +02:00
*ptr = NULL;
}
2024-06-29 02:27:50 +02:00
void gpu_set_stream(void* handle, void* stream) {
2024-06-25 18:32:44 +02:00
return;
}
void gpu_synchronize() {
return;
}
/* BLAS functions */
void gpu_blas_create(void** handle) {
2024-07-02 13:58:19 +02:00
*handle = (void*) malloc(sizeof(char));
2024-06-25 18:32:44 +02:00
}
void gpu_blas_destroy(void** handle) {
2024-07-02 13:58:19 +02:00
free(*handle);
2024-06-25 18:32:44 +02:00
*handle = NULL;
}
double ddot_(const int32_t* n, const double* x, const int32_t* incx, const double* y, const int32_t* incy);
2024-06-29 02:27:50 +02:00
void gpu_ddot(void* handle, const int64_t n, const double* x, const int64_t incx, const double* y, const int64_t incy, double* result) {
assert (handle != NULL);
2024-06-25 18:32:44 +02:00
/* Convert to int32_t */
int32_t n_, incx_, incy_;
n_ = (int32_t) n;
incx_ = (int32_t) incx;
incy_ = (int32_t) incy;
/* Check for integer overflows */
assert ( (int64_t) n_ == n );
assert ( (int64_t) incx_ == incx);
assert ( (int64_t) incy_ == incy);
*result = ddot_(&n_, x, &incx_, y, &incy_);
}
float sdot_(const int32_t* n, const float* x, const int32_t* incx, const float* y, const int32_t* incy);
2024-06-29 02:27:50 +02:00
void gpu_sdot(void* handle, const int64_t n, const float* x, const int64_t incx, const float* y, const int64_t incy, float* result) {
assert (handle != NULL);
2024-06-25 18:32:44 +02:00
/* Convert to int32_t */
int32_t n_, incx_, incy_;
n_ = (int32_t) n;
incx_ = (int32_t) incx;
incy_ = (int32_t) incy;
/* Check for integer overflows */
assert ( (int64_t) n_ == n );
assert ( (int64_t) incx_ == incx);
assert ( (int64_t) incy_ == incy);
*result = sdot_(&n_, x, &incx_, y, &incy_);
}
void dgemv_(const char* transa, const int32_t* m, const int32_t* n, const double* alpha,
const double* a, const int32_t* lda, const double* x, const int32_t* incx, const double* beta, double* y, const int32_t* incy);
2024-07-03 18:24:13 +02:00
void gpu_dgemv(void* handle, const char* transa, const int64_t m, const int64_t n, const double* alpha,
const double* a, const int64_t lda, const double* x, const int64_t incx, const double* beta, double* y, const int64_t incy) {
2024-06-25 18:32:44 +02:00
2024-06-29 02:27:50 +02:00
assert (handle != NULL);
2024-06-25 18:32:44 +02:00
/* Convert to int32_t */
int32_t m_, n_, lda_, incx_, incy_;
m_ = (int32_t) m;
n_ = (int32_t) n;
lda_ = (int32_t) lda;
incx_ = (int32_t) incx;
incy_ = (int32_t) incy;
/* Check for integer overflows */
assert ( (int64_t) m_ == m );
assert ( (int64_t) n_ == n );
assert ( (int64_t) lda_ == lda );
assert ( (int64_t) incx_ == incx);
assert ( (int64_t) incy_ == incy);
2024-07-03 18:24:13 +02:00
dgemv_(transa, &m_, &n_, alpha, a, &lda_, x, &incx_, beta, y, &incy_);
2024-06-25 18:32:44 +02:00
}
void sgemv_(const char* transa, const int32_t* m, const int32_t* n, const float* alpha,
const float* a, const int32_t* lda, const float* x, const int32_t* incx, const float* beta, float* y, const int32_t* incy);
2024-07-03 18:24:13 +02:00
void gpu_sgemv(void* handle, const char* transa, const int64_t m, const int64_t n, const float* alpha,
const float* a, const int64_t lda, const float* x, const int64_t incx, const float* beta, float* y, const int64_t incy) {
2024-06-25 18:32:44 +02:00
2024-06-29 02:27:50 +02:00
assert (handle != NULL);
2024-06-25 18:32:44 +02:00
/* Convert to int32_t */
int32_t m_, n_, lda_, incx_, incy_;
m_ = (int32_t) m;
n_ = (int32_t) n;
lda_ = (int32_t) lda;
incx_ = (int32_t) incx;
incy_ = (int32_t) incy;
/* Check for integer overflows */
assert ( (int64_t) m_ == m );
assert ( (int64_t) n_ == n );
assert ( (int64_t) lda_ == lda );
assert ( (int64_t) incx_ == incx);
assert ( (int64_t) incy_ == incy);
2024-07-03 18:24:13 +02:00
sgemv_(transa, &m_, &n_, alpha, a, &lda_, x, &incx_, beta, y, &incy_);
2024-06-25 18:32:44 +02:00
}
void dgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const double* alpha,
const double* a, const int32_t* lda, const double* b, const int32_t* ldb, const double* beta, double* c, const int32_t* ldc);
2024-07-03 18:24:13 +02:00
void gpu_dgemm(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const double* alpha,
const double* a, const int64_t lda, const double* b, const int64_t ldb, const double* beta, double* c, const int64_t ldc) {
2024-06-25 18:32:44 +02:00
2024-06-29 02:27:50 +02:00
assert (handle != NULL);
2024-06-25 18:32:44 +02:00
/* Convert to int32_t */
int32_t m_, n_, k_, lda_, ldb_, ldc_;
m_ = (int32_t) m;
n_ = (int32_t) n;
k_ = (int32_t) k;
lda_ = (int32_t) lda;
ldb_ = (int32_t) ldb;
ldc_ = (int32_t) ldc;
/* Check for integer overflows */
assert ( (int64_t) m_ == m );
assert ( (int64_t) n_ == n );
assert ( (int64_t) k_ == k );
assert ( (int64_t) lda_ == lda);
assert ( (int64_t) ldb_ == ldb);
assert ( (int64_t) ldc_ == ldc);
2024-07-03 18:24:13 +02:00
dgemm_(transa, transb, &m_, &n_, &k_, alpha, a, &lda_, b, &ldb_, beta, c, &ldc_);
2024-06-25 18:32:44 +02:00
}
void sgemm_(const char* transa, const char* transb, const int32_t* m, const int32_t* n, const int32_t* k, const float* alpha,
const float* a, const int32_t* lda, const float* b, const int32_t* ldb, const float* beta, float* c, const int32_t* ldc);
2024-07-03 18:24:13 +02:00
void gpu_sgemm(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const int64_t k, const float* alpha,
const float* a, const int64_t lda, const float* b, const int64_t ldb, const float* beta, float* c, const int64_t ldc) {
2024-06-25 18:32:44 +02:00
2024-06-29 02:27:50 +02:00
assert (handle != NULL);
2024-06-25 18:32:44 +02:00
/* Convert to int32_t */
int32_t m_, n_, k_, lda_, ldb_, ldc_;
m_ = (int32_t) m;
n_ = (int32_t) n;
k_ = (int32_t) k;
lda_ = (int32_t) lda;
ldb_ = (int32_t) ldb;
ldc_ = (int32_t) ldc;
/* Check for integer overflows */
assert ( (int64_t) m_ == m );
assert ( (int64_t) n_ == n );
assert ( (int64_t) k_ == k );
assert ( (int64_t) lda_ == lda);
assert ( (int64_t) ldb_ == ldb);
assert ( (int64_t) ldc_ == ldc);
2024-07-03 18:24:13 +02:00
sgemm_(transa, transb, &m_, &n_, &k_, alpha, a, &lda_, b, &ldb_, beta, c, &ldc_);
2024-06-25 18:32:44 +02:00
}
2024-07-03 18:24:13 +02:00
void gpu_dgeam(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const double* alpha,
const double* a, const int64_t lda, const double* beta, const double* b, const int64_t ldb, double* c, const int64_t ldc) {
2024-06-29 02:27:50 +02:00
assert (handle != NULL);
2024-06-25 18:32:44 +02:00
2024-07-02 13:58:19 +02:00
if ( (*transa == 'N' && *transb == 'N') ||
(*transa == 'n' && *transb == 'N') ||
(*transa == 'N' && *transb == 'n') ||
(*transa == 'n' && *transb == 'n') ) {
2024-06-25 18:32:44 +02:00
2024-07-03 18:24:13 +02:00
if (*alpha == 0.) {
2024-06-25 18:32:44 +02:00
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *beta * b[j*ldb+i];
2024-06-25 18:32:44 +02:00
}
}
2024-07-03 18:24:13 +02:00
} else if (*beta == 0.) {
2024-06-25 18:32:44 +02:00
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *alpha * a[j*lda+i];
2024-06-25 18:32:44 +02:00
}
}
} else {
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *alpha * a[j*lda+i] + *beta * b[j*ldb+i];
2024-06-25 18:32:44 +02:00
}
}
}
2024-07-02 13:58:19 +02:00
} else if ( (*transa == 'N' && *transb == 'T') ||
(*transa == 'n' && *transb == 'T') ||
(*transa == 'N' && *transb == 't') ||
(*transa == 'n' && *transb == 't') ) {
2024-06-25 18:32:44 +02:00
2024-07-03 18:24:13 +02:00
if (*alpha == 0.) {
2024-06-25 18:32:44 +02:00
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *beta * b[i*ldb+j];
2024-06-25 18:32:44 +02:00
}
}
2024-07-03 18:24:13 +02:00
} else if (*beta == 0.) {
2024-06-25 18:32:44 +02:00
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *alpha * a[j*lda+i];
2024-06-25 18:32:44 +02:00
}
}
} else {
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *alpha * a[j*lda+i] + *beta * b[i*ldb+j];
2024-06-25 18:32:44 +02:00
}
}
}
2024-07-02 13:58:19 +02:00
} else if ( (*transa == 'T' && *transb == 'N') ||
(*transa == 't' && *transb == 'N') ||
(*transa == 'T' && *transb == 'n') ||
(*transa == 't' && *transb == 'n') ) {
2024-06-25 18:32:44 +02:00
2024-07-03 18:24:13 +02:00
if (*alpha == 0.) {
2024-06-25 18:32:44 +02:00
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *beta * b[j*ldb+i];
2024-06-25 18:32:44 +02:00
}
}
2024-07-03 18:24:13 +02:00
} else if (*beta == 0.) {
2024-06-25 18:32:44 +02:00
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *alpha * a[i*lda+j];
2024-06-25 18:32:44 +02:00
}
}
} else {
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *alpha * a[i*lda+j] + *beta * b[j*ldb+i];
2024-06-25 18:32:44 +02:00
}
}
}
2024-07-02 13:58:19 +02:00
} else if ( (*transa == 'T' && *transb == 'T') ||
(*transa == 't' && *transb == 'T') ||
(*transa == 'T' && *transb == 't') ||
(*transa == 't' && *transb == 't') ) {
2024-06-25 18:32:44 +02:00
2024-07-03 18:24:13 +02:00
if (*alpha == 0.) {
2024-06-25 18:32:44 +02:00
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *beta * b[i*ldb+j];
2024-06-25 18:32:44 +02:00
}
}
2024-07-03 18:24:13 +02:00
} else if (*beta == 0.) {
2024-06-25 18:32:44 +02:00
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *alpha * a[i*lda+j];
2024-06-25 18:32:44 +02:00
}
}
} else {
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *alpha * a[i*lda+j] + *beta * b[i*ldb+j];
2024-06-25 18:32:44 +02:00
}
}
}
}
}
2024-07-03 18:24:13 +02:00
void gpu_sgeam(void* handle, const char* transa, const char* transb, const int64_t m, const int64_t n, const float* alpha,
const float* a, const int64_t lda, const float* beta, const float* b, const int64_t ldb, float* c, const int64_t ldc) {
2024-06-29 02:27:50 +02:00
assert (handle != NULL);
2024-06-25 18:32:44 +02:00
2024-07-02 13:58:19 +02:00
if ( (*transa == 'N' && *transb == 'N') ||
(*transa == 'n' && *transb == 'N') ||
(*transa == 'N' && *transb == 'n') ||
(*transa == 'n' && *transb == 'n') ) {
2024-06-25 18:32:44 +02:00
2024-07-03 18:24:13 +02:00
if (*alpha == 0.) {
2024-06-25 18:32:44 +02:00
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *beta * b[j*ldb+i];
2024-06-25 18:32:44 +02:00
}
}
2024-07-03 18:24:13 +02:00
} else if (*beta == 0.) {
2024-06-25 18:32:44 +02:00
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *alpha * a[j*lda+i];
2024-06-25 18:32:44 +02:00
}
}
} else {
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *alpha * a[j*lda+i] + *beta * b[j*ldb+i];
2024-06-25 18:32:44 +02:00
}
}
}
2024-07-02 13:58:19 +02:00
} else if ( (*transa == 'N' && *transb == 'T') ||
(*transa == 'n' && *transb == 'T') ||
(*transa == 'N' && *transb == 't') ||
(*transa == 'n' && *transb == 't') ) {
2024-06-25 18:32:44 +02:00
2024-07-03 18:24:13 +02:00
if (*alpha == 0.) {
2024-06-25 18:32:44 +02:00
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *beta * b[i*ldb+j];
2024-06-25 18:32:44 +02:00
}
}
2024-07-03 18:24:13 +02:00
} else if (*beta == 0.) {
2024-06-25 18:32:44 +02:00
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *alpha * a[j*lda+i];
2024-06-25 18:32:44 +02:00
}
}
} else {
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *alpha * a[j*lda+i] + *beta * b[i*ldb+j];
2024-06-25 18:32:44 +02:00
}
}
}
2024-07-02 13:58:19 +02:00
} else if ( (*transa == 'T' && *transb == 'N') ||
(*transa == 't' && *transb == 'N') ||
(*transa == 'T' && *transb == 'n') ||
(*transa == 't' && *transb == 'n') ) {
2024-06-25 18:32:44 +02:00
2024-07-03 18:24:13 +02:00
if (*alpha == 0.) {
2024-06-25 18:32:44 +02:00
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *beta * b[j*ldb+i];
2024-06-25 18:32:44 +02:00
}
}
2024-07-03 18:24:13 +02:00
} else if (*beta == 0.) {
2024-06-25 18:32:44 +02:00
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *alpha * a[i*lda+j];
2024-06-25 18:32:44 +02:00
}
}
} else {
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *alpha * a[i*lda+j] + *beta * b[j*ldb+i];
2024-06-25 18:32:44 +02:00
}
}
}
2024-07-02 13:58:19 +02:00
} else if ( (*transa == 'T' && *transb == 'T') ||
(*transa == 't' && *transb == 'T') ||
(*transa == 'T' && *transb == 't') ||
(*transa == 't' && *transb == 't') ) {
2024-06-25 18:32:44 +02:00
2024-07-03 18:24:13 +02:00
if (*alpha == 0.) {
2024-06-25 18:32:44 +02:00
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *beta * b[i*ldb+j];
2024-06-25 18:32:44 +02:00
}
}
2024-07-03 18:24:13 +02:00
} else if (*beta == 0.) {
2024-06-25 18:32:44 +02:00
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *alpha * a[i*lda+j];
2024-06-25 18:32:44 +02:00
}
}
} else {
for (int64_t j=0 ; j<n ; ++j) {
2024-06-27 15:45:52 +02:00
for (int64_t i=0 ; i<m ; ++i) {
2024-07-03 18:24:13 +02:00
c[j*ldc+i] = *alpha * a[i*lda+j] + *beta * b[i*ldb+j];
2024-06-25 18:32:44 +02:00
}
}
}
}
}