2021-04-23 11:25:45 +02:00
|
|
|
#+TITLE: QMCkl dgemm
|
|
|
|
|
|
|
|
#+NAME: header
|
|
|
|
#+BEGIN_SRC text
|
|
|
|
Generated from qmckl_dgemm.org
|
|
|
|
#+END_SRC
|
|
|
|
|
|
|
|
* Fortran interface
|
|
|
|
|
|
|
|
#+BEGIN_SRC f90 :noweb yes :tangle qmckl_blas_f.f90
|
|
|
|
! <<header>>
|
|
|
|
|
|
|
|
module qmckl_blas
|
|
|
|
use :: iso_c_binding
|
|
|
|
|
|
|
|
interface
|
|
|
|
subroutine qmckl_dgemm(transa, transb, m, n, k, &
|
|
|
|
alpha, A, lda, B, ldb, beta, C, ldc) bind(C)
|
|
|
|
use :: iso_c_binding
|
|
|
|
implicit none
|
|
|
|
character(kind=c_char ), value :: transa, transb
|
|
|
|
integer (kind=c_int ), value :: m, n, k, lda, ldb, ldc
|
|
|
|
real (kind=c_double), value :: alpha, beta
|
|
|
|
real (kind=c_double) :: A(lda,*), B(ldb,*), C(ldc,*)
|
|
|
|
end subroutine qmckl_dgemm
|
|
|
|
end interface
|
|
|
|
|
|
|
|
end module qmckl_blas
|
|
|
|
#+END_SRC
|
|
|
|
|
2021-04-23 14:18:59 +02:00
|
|
|
* TODO C code
|
2021-04-23 11:25:45 +02:00
|
|
|
To avoid passing too many arguments to recursive subroutines, we put
|
|
|
|
all the arguments in a struct.
|
|
|
|
|
|
|
|
#+NAME: dgemm_args
|
|
|
|
#+BEGIN_SRC c
|
|
|
|
struct dgemm_args {
|
2021-04-23 14:18:59 +02:00
|
|
|
double alpha;
|
2021-04-23 11:25:45 +02:00
|
|
|
double beta;
|
|
|
|
double* A;
|
|
|
|
double* B;
|
|
|
|
double* C;
|
|
|
|
int m;
|
|
|
|
int n;
|
|
|
|
int k;
|
|
|
|
int lda;
|
|
|
|
int ldb;
|
|
|
|
int ldc;
|
|
|
|
CBLAS_LAYOUT transa;
|
|
|
|
CBLAS_LAYOUT transb;
|
2021-04-23 14:18:59 +02:00
|
|
|
};
|
|
|
|
|
2021-04-23 11:25:45 +02:00
|
|
|
#+END_SRC
|
|
|
|
|
|
|
|
The driver routine packs the arguments in the struct and calls the
|
|
|
|
recursive routine.
|
|
|
|
|
|
|
|
#+NAME: dgemm
|
2021-04-23 14:18:59 +02:00
|
|
|
#+BEGIN_SRC c
|
2021-04-23 11:25:45 +02:00
|
|
|
void qmckl_dgemm(char transa, char transb,
|
|
|
|
int m, int n, int k,
|
2021-04-23 14:18:59 +02:00
|
|
|
double alpha,
|
2021-04-23 11:25:45 +02:00
|
|
|
double* A, int lda,
|
|
|
|
double* B, int ldb,
|
|
|
|
double beta,
|
|
|
|
double* C, int ldc)
|
|
|
|
{
|
2021-04-23 14:18:59 +02:00
|
|
|
struct dgemm_args args;
|
2021-04-23 11:25:45 +02:00
|
|
|
|
|
|
|
args.alpha = alpha;
|
|
|
|
args.beta = beta ;
|
|
|
|
args.A = A;
|
|
|
|
args.B = B;
|
|
|
|
args.C = C;
|
|
|
|
args.m = m;
|
|
|
|
args.n = n;
|
|
|
|
args.k = k;
|
|
|
|
args.lda = lda;
|
|
|
|
args.ldb = ldb;
|
|
|
|
args.ldc = ldc;
|
|
|
|
|
|
|
|
if (transa == 'T' || transa == 't') {
|
2021-04-23 14:18:59 +02:00
|
|
|
args.transa = CblasTrans;
|
2021-04-23 11:25:45 +02:00
|
|
|
} else {
|
2021-04-23 14:18:59 +02:00
|
|
|
args.transa = CblasNoTrans;
|
2021-04-23 11:25:45 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
CBLAS_LAYOUT tb;
|
|
|
|
if (transa == 'T' || transa == 't') {
|
2021-04-23 14:18:59 +02:00
|
|
|
args.transb = CblasTrans;
|
2021-04-23 11:25:45 +02:00
|
|
|
} else {
|
2021-04-23 14:18:59 +02:00
|
|
|
args.transb = CblasNoTrans;
|
2021-04-23 11:25:45 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
qmckl_dgemm_rec(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
#+END_SRC
|
|
|
|
|
|
|
|
|
|
|
|
#+NAME: dgemm_rec
|
2021-04-23 14:18:59 +02:00
|
|
|
#+BEGIN_SRC c
|
|
|
|
#define MIN_SIZE 512
|
|
|
|
#include<stdio.h>
|
|
|
|
|
2021-04-23 11:25:45 +02:00
|
|
|
static void qmckl_dgemm_rec(struct dgemm_args args) {
|
|
|
|
|
2021-04-23 14:18:59 +02:00
|
|
|
// printf("%5d %5d\n", args.m, args.n);
|
|
|
|
|
|
|
|
if ( (args.m <= MIN_SIZE) || (args.n <= MIN_SIZE)) {
|
|
|
|
cblas_dgemm(CblasColMajor, args.transa, args.transb,
|
|
|
|
args.m, args.n, args.k, args.alpha,
|
|
|
|
args.A, args.lda, args.B, args.ldb,
|
|
|
|
args.beta, args.C, args.ldc);
|
|
|
|
} else {
|
|
|
|
|
|
|
|
int m1 = args.m / 2;
|
|
|
|
int m2 = args.m - m1;
|
|
|
|
int n1 = args.n / 2;
|
|
|
|
int n2 = args.n - n1;
|
|
|
|
|
|
|
|
struct dgemm_args args_1 = args;
|
|
|
|
args_1.m = m1;
|
|
|
|
args_1.n = n1;
|
|
|
|
qmckl_dgemm_rec(args_1);
|
|
|
|
|
|
|
|
// TODO: assuming 'N', 'N' here
|
|
|
|
struct dgemm_args args_2 = args;
|
|
|
|
args_2.B = args.B + args.ldb*n1;
|
|
|
|
args_2.C = args.C + args.ldc*n1;
|
|
|
|
args_2.m = m1;
|
|
|
|
args_2.n = n2;
|
|
|
|
qmckl_dgemm_rec(args_2);
|
|
|
|
|
|
|
|
struct dgemm_args args_3 = args;
|
|
|
|
args_3.A = args.A + m1;
|
|
|
|
args_3.C = args.C + m1;
|
|
|
|
args_3.m = m2;
|
|
|
|
args_3.n = n1;
|
|
|
|
qmckl_dgemm_rec(args_3);
|
|
|
|
|
|
|
|
struct dgemm_args args_4 = args;
|
|
|
|
args_4.A = args.A + m1;
|
|
|
|
args_4.B = args.B + args.ldb*n1;
|
|
|
|
args_4.C = args.C + m1 + args.ldc*n1;
|
|
|
|
args_4.m = m2;
|
|
|
|
args_4.n = n2;
|
|
|
|
qmckl_dgemm_rec(args_4);
|
|
|
|
}
|
2021-04-23 11:25:45 +02:00
|
|
|
|
|
|
|
}
|
|
|
|
#+END_SRC
|
|
|
|
|
|
|
|
#+BEGIN_SRC c :noweb yes :tangle qmckl_dgemm.c
|
|
|
|
/* <<header>> */
|
|
|
|
|
|
|
|
#include <cblas.h>
|
|
|
|
|
|
|
|
<<dgemm_args>>
|
|
|
|
|
|
|
|
<<dgemm_rec>>
|
|
|
|
|
|
|
|
<<dgemm>>
|
|
|
|
#+END_SRC
|
|
|
|
|