diff --git a/independent_test_harness/cblasdgemm_vs_cublasdgemm_test.c b/independent_test_harness/cblasdgemm_vs_cublasdgemm_test.c deleted file mode 100644 index bee638c..0000000 --- a/independent_test_harness/cblasdgemm_vs_cublasdgemm_test.c +++ /dev/null @@ -1,110 +0,0 @@ -/* -Compile with: - -nvc \ --I$NV_CUDA_MATH_PATH/11.7/include \ --L$NV_CUDA_MATH_PATH/11.7/lib64 \ --L${MKLROOT}/lib/intel64 \ --lmkl_intel_lp64 \ --lmkl_sequential \ --lmkl_core \ --lpthread \ --lm \ --ldl \ --lcublas \ --mp \ --target=gpu \ -cblasdgemm_vs_cublasdgemm_test.c \ --o cblasdgemm_vs_cublasdgemm_test - -*/ - -#include -#include -#include -#include - -#include -#include - -#include -#include - -#include "debug.h" - -int main() { - - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { - fprintf(stdout, "cuBLAS initialization failed!\n"); - exit(EXIT_FAILURE); - } - - uint16_t M = 3; - uint16_t N = 2; - uint16_t K = 4; - - double *a = malloc(M * K * sizeof(double)); // M x K = 3 x 4 - double *acm = malloc(M * K * sizeof(double)); // col-major stored - double *b = malloc(K * N * sizeof(double)); // K x N = 4 x 2 - double *bcm = malloc(K * N * sizeof(double)); // col-major stored - double *c = malloc(M * N * sizeof(double)); // M x N = 3 x 2 - - a[0] = 1, a[1] = 2, a[2] = 3, a[3] = 4, a[4] = 5, a[5] = 6, a[6] = 7, a[7] = 8; a[8] = 9, a[9] = 10, a[10] = 11, a[11] = 12; - acm[0] = 1, acm[1] = 5, acm[2] = 9, acm[3] = 2, acm[4] = 6, acm[5] = 10, acm[6] = 3, acm[7] = 7; acm[8] = 11, acm[9] = 4, acm[10] = 8, acm[11] = 12; - b[0] = 13, b[1] = 14, b[2] = 15, b[3] = 16, b[4] = 17, b[5] = 18, b[6] = 19, b[7] = 20; - bcm[0] = 13, bcm[1] = 15, bcm[2] = 17, bcm[3] = 19, bcm[4] = 14, bcm[5] = 16, bcm[6] = 18, bcm[7] = 20; - - uint16_t lda = K; - uint16_t ldacm = M; - uint16_t ldb = N; - uint16_t ldbcm = K; - uint16_t ldc = N; - - double alpha = 1.0, beta = 0.0; - - - cblas_dgemm(CblasRowMajor, - CblasNoTrans, CblasNoTrans, - M, N, K, - alpha, a, lda, b, ldb, - beta, c, ldc); - print_m(c, M, N, ldc, "c_cblas_dgemm"); - - - memset(c, 0, M*N*sizeof(double)); - #pragma omp target enter data map(to:a[0:M*K], b[0:K*N], c[0:M*N]) - #pragma omp target data use_device_ptr(a, b, c) - { - int cublasError = cublasDgemm(handle, - CUBLAS_OP_N, CUBLAS_OP_N, - N, M, K, - &alpha, b, ldb, a, lda, - &beta, c, ldc); - } - #pragma omp target exit data map(from:c[0:M*N]) - print_m(c, M, N, ldc, "c_cublasDgemm"); - - - memset(c, 0, M*N*sizeof(double)); - ldc = M; // ldc : N -> M, because cublasDgemm stores result in col-maj - #pragma omp target enter data map(to:acm[0:M*K], bcm[0:K*N], c[0:M*N]) - #pragma omp target data use_device_ptr(acm, bcm, c) - { - int cublasError = cublasDgemm(handle, - CUBLAS_OP_N, CUBLAS_OP_N, - M, N, K, - &alpha, acm, ldacm, bcm, ldbcm, - &beta, c, ldc); - } - #pragma omp target exit data map(from:c[0:M*N]) - print_m_t(c, M, N, ldc, "c_col-maj_cublasDgemm"); - - - free(a); - free(acm); - free(b); - free(bcm); - free(c); - return 0; -}