From 47823c5bb7039db7c1fd43c485f514dd8a51e92a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 23 Apr 2021 14:30:34 +0200 Subject: [PATCH] OpenMP tasks --- Makefile | 3 +- qmckl_dgemm.c | 88 ++++++++++++++++++++++++++++++------------------- qmckl_dgemm.org | 88 ++++++++++++++++++++++++++++++------------------- 3 files changed, 112 insertions(+), 67 deletions(-) diff --git a/Makefile b/Makefile index 4231fe4..d65fa91 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,7 @@ IRPF90 = irpf90/bin/irpf90 --codelet=factor_een:2 --align=4096 # -s nelec_8:504 -s nnuc:100 -s ncord:5 #-a -d #FC = ifort -xCORE-AVX512 -g -mkl=sequential -qopt-zmm-usage=high FC = ifort -xCORE-AVX2 -g +CC = gcc -fopenmp FCFLAGS= -O3 -I . NINJA = ninja ARCHIVE = ar crs @@ -8,7 +9,7 @@ RANLIB = ranlib SRC= IRPF90_temp/qmckl_blas_f.f90 IRPF90_temp/qmckl_dgemm.c OBJ= IRPF90_temp/qmckl_blas_f.o IRPF90_temp/qmckl_dgemm.o -LIB= -mkl=sequential +LIB= -mkl=sequential -lgomp -include irpf90.make export diff --git a/qmckl_dgemm.c b/qmckl_dgemm.c index 2935807..6c7bea3 100644 --- a/qmckl_dgemm.c +++ b/qmckl_dgemm.c @@ -27,44 +27,59 @@ static void qmckl_dgemm_rec(struct dgemm_args args) { // printf("%5d %5d\n", args.m, args.n); if ( (args.m <= MIN_SIZE) || (args.n <= MIN_SIZE)) { - cblas_dgemm(CblasColMajor, args.transa, args.transb, - args.m, args.n, args.k, args.alpha, - args.A, args.lda, args.B, args.ldb, - args.beta, args.C, args.ldc); + #pragma omp task + { + cblas_dgemm(CblasColMajor, args.transa, args.transb, + args.m, args.n, args.k, args.alpha, + args.A, args.lda, args.B, args.ldb, + args.beta, args.C, args.ldc); + } } else { int m1 = args.m / 2; int m2 = args.m - m1; int n1 = args.n / 2; int n2 = args.n - n1; - - struct dgemm_args args_1 = args; - args_1.m = m1; - args_1.n = n1; - qmckl_dgemm_rec(args_1); - - // TODO: assuming 'N', 'N' here - struct dgemm_args args_2 = args; - args_2.B = args.B + args.ldb*n1; - args_2.C = args.C + args.ldc*n1; - args_2.m = m1; - args_2.n = n2; - qmckl_dgemm_rec(args_2); - - struct dgemm_args args_3 = args; - args_3.A = args.A + m1; - args_3.C = args.C + m1; - args_3.m = m2; - args_3.n = n1; - qmckl_dgemm_rec(args_3); - - struct dgemm_args args_4 = args; - args_4.A = args.A + m1; - args_4.B = args.B + args.ldb*n1; - args_4.C = args.C + m1 + args.ldc*n1; - args_4.m = m2; - args_4.n = n2; - qmckl_dgemm_rec(args_4); + + #pragma omp task + { + struct dgemm_args args_1 = args; + args_1.m = m1; + args_1.n = n1; + qmckl_dgemm_rec(args_1); + } + + #pragma omp task + { + // TODO: assuming 'N', 'N' here + struct dgemm_args args_2 = args; + args_2.B = args.B + args.ldb*n1; + args_2.C = args.C + args.ldc*n1; + args_2.m = m1; + args_2.n = n2; + qmckl_dgemm_rec(args_2); + } + + #pragma omp task + { + struct dgemm_args args_3 = args; + args_3.A = args.A + m1; + args_3.C = args.C + m1; + args_3.m = m2; + args_3.n = n1; + qmckl_dgemm_rec(args_3); + } + + #pragma omp task + { + struct dgemm_args args_4 = args; + args_4.A = args.A + m1; + args_4.B = args.B + args.ldb*n1; + args_4.C = args.C + m1 + args.ldc*n1; + args_4.m = m2; + args_4.n = n2; + qmckl_dgemm_rec(args_4); + } } } @@ -104,5 +119,12 @@ void qmckl_dgemm(char transa, char transb, args.transb = CblasNoTrans; } - qmckl_dgemm_rec(args); + #pragma omp parallel + { + #pragma omp single + { + qmckl_dgemm_rec(args); + } + #pragma omp taskwait + } } diff --git a/qmckl_dgemm.org b/qmckl_dgemm.org index ee90d5c..db896a1 100644 --- a/qmckl_dgemm.org +++ b/qmckl_dgemm.org @@ -92,7 +92,14 @@ void qmckl_dgemm(char transa, char transb, args.transb = CblasNoTrans; } - qmckl_dgemm_rec(args); + #pragma omp parallel + { + #pragma omp single + { + qmckl_dgemm_rec(args); + } + #pragma omp taskwait + } } #+END_SRC @@ -108,44 +115,59 @@ static void qmckl_dgemm_rec(struct dgemm_args args) { // printf("%5d %5d\n", args.m, args.n); if ( (args.m <= MIN_SIZE) || (args.n <= MIN_SIZE)) { - cblas_dgemm(CblasColMajor, args.transa, args.transb, - args.m, args.n, args.k, args.alpha, - args.A, args.lda, args.B, args.ldb, - args.beta, args.C, args.ldc); + #pragma omp task + { + cblas_dgemm(CblasColMajor, args.transa, args.transb, + args.m, args.n, args.k, args.alpha, + args.A, args.lda, args.B, args.ldb, + args.beta, args.C, args.ldc); + } } else { int m1 = args.m / 2; int m2 = args.m - m1; int n1 = args.n / 2; int n2 = args.n - n1; - - struct dgemm_args args_1 = args; - args_1.m = m1; - args_1.n = n1; - qmckl_dgemm_rec(args_1); - - // TODO: assuming 'N', 'N' here - struct dgemm_args args_2 = args; - args_2.B = args.B + args.ldb*n1; - args_2.C = args.C + args.ldc*n1; - args_2.m = m1; - args_2.n = n2; - qmckl_dgemm_rec(args_2); - - struct dgemm_args args_3 = args; - args_3.A = args.A + m1; - args_3.C = args.C + m1; - args_3.m = m2; - args_3.n = n1; - qmckl_dgemm_rec(args_3); - - struct dgemm_args args_4 = args; - args_4.A = args.A + m1; - args_4.B = args.B + args.ldb*n1; - args_4.C = args.C + m1 + args.ldc*n1; - args_4.m = m2; - args_4.n = n2; - qmckl_dgemm_rec(args_4); + + #pragma omp task + { + struct dgemm_args args_1 = args; + args_1.m = m1; + args_1.n = n1; + qmckl_dgemm_rec(args_1); + } + + #pragma omp task + { + // TODO: assuming 'N', 'N' here + struct dgemm_args args_2 = args; + args_2.B = args.B + args.ldb*n1; + args_2.C = args.C + args.ldc*n1; + args_2.m = m1; + args_2.n = n2; + qmckl_dgemm_rec(args_2); + } + + #pragma omp task + { + struct dgemm_args args_3 = args; + args_3.A = args.A + m1; + args_3.C = args.C + m1; + args_3.m = m2; + args_3.n = n1; + qmckl_dgemm_rec(args_3); + } + + #pragma omp task + { + struct dgemm_args args_4 = args; + args_4.A = args.A + m1; + args_4.B = args.B + args.ldb*n1; + args_4.C = args.C + m1 + args.ldc*n1; + args_4.m = m2; + args_4.n = n2; + qmckl_dgemm_rec(args_4); + } } }