diff --git a/Makefile b/Makefile index 2962eb7..ca6b8a2 100644 --- a/Makefile +++ b/Makefile @@ -1,17 +1,21 @@ IRPF90 = irpf90/bin/irpf90 --codelet=factor_een:2 --align=4096 # -s nelec_8:504 -s nnuc:100 -s ncord:5 #-a -d #FC = ifort -xCORE-AVX512 -g -mkl=sequential -qopt-zmm-usage=high -FC = ifort -xCORE-AVX2 -g -mkl=sequential +FC = ifort -xCORE-AVX2 -g FCFLAGS= -O3 -I . NINJA = ninja ARCHIVE = ar crs RANLIB = ranlib -SRC= -OBJ= -LIB= +SRC= IRPF90_temp/qmckl_blas_f.f90 IRPF90_temp/qmckl_dgemm.c +OBJ= IRPF90_temp/qmckl_blas_f.o IRPF90_temp/qmckl_dgemm.o +LIB= -mkl=sequential -include irpf90.make export +irpf90.make: qmckl_blas_f.o irpf90.make: $(filter-out IRPF90_temp/%, $(wildcard */*.irp.f)) $(wildcard *.irp.f) $(wildcard *.inc.f) Makefile $(IRPF90) + +IRPF90_temp/%.o: %.c + $(CC) -c $< -o $@ diff --git a/el_nuc_el_blas.irp.f b/el_nuc_el_blas.irp.f index 3a39825..482e8cb 100644 --- a/el_nuc_el_blas.irp.f +++ b/el_nuc_el_blas.irp.f @@ -13,7 +13,7 @@ ! r_{ij}^k . R_{ja}^l -> tmp_c_{ia}^{kl} do k=0,ncord-1 - call dgemm('N','N', nelec, nnuc*(ncord+1), nelec, 1.d0, & + call qmckl_dgemm('N','N', nelec, nnuc*(ncord+1), nelec, 1.d0, & rescale_een_e(1,1,k), size(rescale_een_e,1), & rescale_een_n(1,1,0), size(rescale_een_n,1), 0.d0, & tmp_c(1,1,0,k), size(tmp_c,1)) @@ -21,7 +21,7 @@ ! dr_{ij}^k . R_{ja}^l -> dtmp_c_{ia}^{kl} do k=0,ncord-1 - call dgemm('N','N', 4*nelec_8, nnuc*(ncord+1), nelec, 1.d0, & + call qmckl_dgemm('N','N', 4*nelec_8, nnuc*(ncord+1), nelec, 1.d0, & rescale_een_e_deriv_e(1,1,1,k), & size(rescale_een_e_deriv_e,1)*size(rescale_een_e_deriv_e,2), & rescale_een_n(1,1,0), & diff --git a/qmckl_blas_f.f90 b/qmckl_blas_f.f90 new file mode 100644 index 0000000..621bc89 --- /dev/null +++ b/qmckl_blas_f.f90 @@ -0,0 +1,18 @@ +! Generated from qmckl_dgemm.org + +module qmckl_blas + use :: iso_c_binding + + interface + subroutine qmckl_dgemm(transa, transb, m, n, k, & + alpha, A, lda, B, ldb, beta, C, ldc) bind(C) + use :: iso_c_binding + implicit none + character(kind=c_char ), value :: transa, transb + integer (kind=c_int ), value :: m, n, k, lda, ldb, ldc + real (kind=c_double), value :: alpha, beta + real (kind=c_double) :: A(lda,*), B(ldb,*), C(ldc,*) + end subroutine qmckl_dgemm + end interface + +end module qmckl_blas diff --git a/qmckl_dgemm.c b/qmckl_dgemm.c new file mode 100644 index 0000000..2fdf25c --- /dev/null +++ b/qmckl_dgemm.c @@ -0,0 +1,67 @@ +/* Generated from qmckl_dgemm.org */ + +#include + +struct dgemm_args { + double alpha; + double beta; + double* A; + double* B; + double* C; + int m; + int n; + int k; + int lda; + int ldb; + int ldc; + CBLAS_LAYOUT transa; + CBLAS_LAYOUT transb; +}; + + +static void qmckl_dgemm_rec(struct dgemm_args args) { + + cblas_dgemm(CblasColMajor, args.transa, args.transb, + args.m, args.n, args.k, args.alpha, + args.A, args.lda, args.B, args.ldb, + args.beta, args.C, args.ldc); + +} + +void qmckl_dgemm(char transa, char transb, + int m, int n, int k, + double alpha, + double* A, int lda, + double* B, int ldb, + double beta, + double* C, int ldc) +{ + struct dgemm_args args; + + args.alpha = alpha; + args.beta = beta ; + args.A = A; + args.B = B; + args.C = C; + args.m = m; + args.n = n; + args.k = k; + args.lda = lda; + args.ldb = ldb; + args.ldc = ldc; + + if (transa == 'T' || transa == 't') { + args.transa = CblasTrans; + } else { + args.transa = CblasNoTrans; + } + + CBLAS_LAYOUT tb; + if (transa == 'T' || transa == 't') { + args.transb = CblasTrans; + } else { + args.transb = CblasNoTrans; + } + + qmckl_dgemm_rec(args); +} diff --git a/qmckl_dgemm.org b/qmckl_dgemm.org new file mode 100644 index 0000000..5f53d81 --- /dev/null +++ b/qmckl_dgemm.org @@ -0,0 +1,124 @@ +#+TITLE: QMCkl dgemm + +#+NAME: header +#+BEGIN_SRC text +Generated from qmckl_dgemm.org +#+END_SRC + +* Fortran interface + +#+BEGIN_SRC f90 :noweb yes :tangle qmckl_blas_f.f90 +! <
> + +module qmckl_blas + use :: iso_c_binding + + interface + subroutine qmckl_dgemm(transa, transb, m, n, k, & + alpha, A, lda, B, ldb, beta, C, ldc) bind(C) + use :: iso_c_binding + implicit none + character(kind=c_char ), value :: transa, transb + integer (kind=c_int ), value :: m, n, k, lda, ldb, ldc + real (kind=c_double), value :: alpha, beta + real (kind=c_double) :: A(lda,*), B(ldb,*), C(ldc,*) + end subroutine qmckl_dgemm + end interface + +end module qmckl_blas +#+END_SRC + +* C code + To avoid passing too many arguments to recursive subroutines, we put + all the arguments in a struct. + + #+NAME: dgemm_args + #+BEGIN_SRC c +struct dgemm_args { + double alpha; + double beta; + double* A; + double* B; + double* C; + int m; + int n; + int k; + int lda; + int ldb; + int ldc; + CBLAS_LAYOUT transa; + CBLAS_LAYOUT transb; +}; + + #+END_SRC + + The driver routine packs the arguments in the struct and calls the + recursive routine. + + #+NAME: dgemm + #+BEGIN_SRC c +void qmckl_dgemm(char transa, char transb, + int m, int n, int k, + double alpha, + double* A, int lda, + double* B, int ldb, + double beta, + double* C, int ldc) +{ + struct dgemm_args args; + + args.alpha = alpha; + args.beta = beta ; + args.A = A; + args.B = B; + args.C = C; + args.m = m; + args.n = n; + args.k = k; + args.lda = lda; + args.ldb = ldb; + args.ldc = ldc; + + if (transa == 'T' || transa == 't') { + args.transa = CblasTrans; + } else { + args.transa = CblasNoTrans; + } + + CBLAS_LAYOUT tb; + if (transa == 'T' || transa == 't') { + args.transb = CblasTrans; + } else { + args.transb = CblasNoTrans; + } + + qmckl_dgemm_rec(args); +} + + #+END_SRC + + + #+NAME: dgemm_rec + #+BEGIN_SRC c +static void qmckl_dgemm_rec(struct dgemm_args args) { + + cblas_dgemm(CblasColMajor, args.transa, args.transb, + args.m, args.n, args.k, args.alpha, + args.A, args.lda, args.B, args.ldb, + args.beta, args.C, args.ldc); + +} + #+END_SRC + + #+BEGIN_SRC c :noweb yes :tangle qmckl_dgemm.c +/* <
> */ + +#include + +<> + +<> + +<> + #+END_SRC +