1
0
mirror of https://github.com/TREX-CoE/irpjast.git synced 2025-01-10 21:18:38 +01:00

DGEMM in C

This commit is contained in:
Anthony Scemama 2021-04-23 11:25:45 +02:00
parent 3c6352730b
commit a3dbb458fe
5 changed files with 219 additions and 6 deletions

View File

@ -1,17 +1,21 @@
IRPF90 = irpf90/bin/irpf90 --codelet=factor_een:2 --align=4096 # -s nelec_8:504 -s nnuc:100 -s ncord:5 #-a -d
#FC = ifort -xCORE-AVX512 -g -mkl=sequential -qopt-zmm-usage=high
FC = ifort -xCORE-AVX2 -g -mkl=sequential
FC = ifort -xCORE-AVX2 -g
FCFLAGS= -O3 -I .
NINJA = ninja
ARCHIVE = ar crs
RANLIB = ranlib
SRC=
OBJ=
LIB=
SRC= IRPF90_temp/qmckl_blas_f.f90 IRPF90_temp/qmckl_dgemm.c
OBJ= IRPF90_temp/qmckl_blas_f.o IRPF90_temp/qmckl_dgemm.o
LIB= -mkl=sequential
-include irpf90.make
export
irpf90.make: qmckl_blas_f.o
irpf90.make: $(filter-out IRPF90_temp/%, $(wildcard */*.irp.f)) $(wildcard *.irp.f) $(wildcard *.inc.f) Makefile
$(IRPF90)
IRPF90_temp/%.o: %.c
$(CC) -c $< -o $@

View File

@ -13,7 +13,7 @@
! r_{ij}^k . R_{ja}^l -> tmp_c_{ia}^{kl}
do k=0,ncord-1
call dgemm('N','N', nelec, nnuc*(ncord+1), nelec, 1.d0, &
call qmckl_dgemm('N','N', nelec, nnuc*(ncord+1), nelec, 1.d0, &
rescale_een_e(1,1,k), size(rescale_een_e,1), &
rescale_een_n(1,1,0), size(rescale_een_n,1), 0.d0, &
tmp_c(1,1,0,k), size(tmp_c,1))
@ -21,7 +21,7 @@
! dr_{ij}^k . R_{ja}^l -> dtmp_c_{ia}^{kl}
do k=0,ncord-1
call dgemm('N','N', 4*nelec_8, nnuc*(ncord+1), nelec, 1.d0, &
call qmckl_dgemm('N','N', 4*nelec_8, nnuc*(ncord+1), nelec, 1.d0, &
rescale_een_e_deriv_e(1,1,1,k), &
size(rescale_een_e_deriv_e,1)*size(rescale_een_e_deriv_e,2), &
rescale_een_n(1,1,0), &

18
qmckl_blas_f.f90 Normal file
View File

@ -0,0 +1,18 @@
! Generated from qmckl_dgemm.org
module qmckl_blas
use :: iso_c_binding
interface
subroutine qmckl_dgemm(transa, transb, m, n, k, &
alpha, A, lda, B, ldb, beta, C, ldc) bind(C)
use :: iso_c_binding
implicit none
character(kind=c_char ), value :: transa, transb
integer (kind=c_int ), value :: m, n, k, lda, ldb, ldc
real (kind=c_double), value :: alpha, beta
real (kind=c_double) :: A(lda,*), B(ldb,*), C(ldc,*)
end subroutine qmckl_dgemm
end interface
end module qmckl_blas

67
qmckl_dgemm.c Normal file
View File

@ -0,0 +1,67 @@
/* Generated from qmckl_dgemm.org */
#include <cblas.h>
struct dgemm_args {
double alpha;
double beta;
double* A;
double* B;
double* C;
int m;
int n;
int k;
int lda;
int ldb;
int ldc;
CBLAS_LAYOUT transa;
CBLAS_LAYOUT transb;
};
static void qmckl_dgemm_rec(struct dgemm_args args) {
cblas_dgemm(CblasColMajor, args.transa, args.transb,
args.m, args.n, args.k, args.alpha,
args.A, args.lda, args.B, args.ldb,
args.beta, args.C, args.ldc);
}
void qmckl_dgemm(char transa, char transb,
int m, int n, int k,
double alpha,
double* A, int lda,
double* B, int ldb,
double beta,
double* C, int ldc)
{
struct dgemm_args args;
args.alpha = alpha;
args.beta = beta ;
args.A = A;
args.B = B;
args.C = C;
args.m = m;
args.n = n;
args.k = k;
args.lda = lda;
args.ldb = ldb;
args.ldc = ldc;
if (transa == 'T' || transa == 't') {
args.transa = CblasTrans;
} else {
args.transa = CblasNoTrans;
}
CBLAS_LAYOUT tb;
if (transa == 'T' || transa == 't') {
args.transb = CblasTrans;
} else {
args.transb = CblasNoTrans;
}
qmckl_dgemm_rec(args);
}

124
qmckl_dgemm.org Normal file
View File

@ -0,0 +1,124 @@
#+TITLE: QMCkl dgemm
#+NAME: header
#+BEGIN_SRC text
Generated from qmckl_dgemm.org
#+END_SRC
* Fortran interface
#+BEGIN_SRC f90 :noweb yes :tangle qmckl_blas_f.f90
! <<header>>
module qmckl_blas
use :: iso_c_binding
interface
subroutine qmckl_dgemm(transa, transb, m, n, k, &
alpha, A, lda, B, ldb, beta, C, ldc) bind(C)
use :: iso_c_binding
implicit none
character(kind=c_char ), value :: transa, transb
integer (kind=c_int ), value :: m, n, k, lda, ldb, ldc
real (kind=c_double), value :: alpha, beta
real (kind=c_double) :: A(lda,*), B(ldb,*), C(ldc,*)
end subroutine qmckl_dgemm
end interface
end module qmckl_blas
#+END_SRC
* C code
To avoid passing too many arguments to recursive subroutines, we put
all the arguments in a struct.
#+NAME: dgemm_args
#+BEGIN_SRC c
struct dgemm_args {
double alpha;
double beta;
double* A;
double* B;
double* C;
int m;
int n;
int k;
int lda;
int ldb;
int ldc;
CBLAS_LAYOUT transa;
CBLAS_LAYOUT transb;
};
#+END_SRC
The driver routine packs the arguments in the struct and calls the
recursive routine.
#+NAME: dgemm
#+BEGIN_SRC c
void qmckl_dgemm(char transa, char transb,
int m, int n, int k,
double alpha,
double* A, int lda,
double* B, int ldb,
double beta,
double* C, int ldc)
{
struct dgemm_args args;
args.alpha = alpha;
args.beta = beta ;
args.A = A;
args.B = B;
args.C = C;
args.m = m;
args.n = n;
args.k = k;
args.lda = lda;
args.ldb = ldb;
args.ldc = ldc;
if (transa == 'T' || transa == 't') {
args.transa = CblasTrans;
} else {
args.transa = CblasNoTrans;
}
CBLAS_LAYOUT tb;
if (transa == 'T' || transa == 't') {
args.transb = CblasTrans;
} else {
args.transb = CblasNoTrans;
}
qmckl_dgemm_rec(args);
}
#+END_SRC
#+NAME: dgemm_rec
#+BEGIN_SRC c
static void qmckl_dgemm_rec(struct dgemm_args args) {
cblas_dgemm(CblasColMajor, args.transa, args.transb,
args.m, args.n, args.k, args.alpha,
args.A, args.lda, args.B, args.ldb,
args.beta, args.C, args.ldc);
}
#+END_SRC
#+BEGIN_SRC c :noweb yes :tangle qmckl_dgemm.c
/* <<header>> */
#include <cblas.h>
<<dgemm_args>>
<<dgemm_rec>>
<<dgemm>>
#+END_SRC