diff --git a/Makefile b/Makefile index 8911dc1..2c21af6 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ STARPU=~/StarPU/starpu-1.3.7/starpu-1.3.pc IRPF90 = irpf90/bin/irpf90 --codelet=factor_een:2 --align=64 # -s nelec_8:504 -s nnuc:100 -s ncord:5 #-a -d #FC = ifort -mkl=sequential -xCORE-AVX512 -g -qopt-zmm-usage=high FC = ifort -mkl=sequential -march=core-avx2 -align array64byte -fma -ftz -fomit-frame-pointer -CC = gcc $(shell pkg-config --cflags $(STARPU) ) +CC = gcc $(shell pkg-config --cflags $(STARPU) chameleon) FCFLAGS= -O3 -I . NINJA = ninja ARCHIVE = ar crs @@ -10,7 +10,7 @@ RANLIB = ranlib SRC= qmckl_blas_f.f90 qmckl_dgemm.c OBJ= IRPF90_temp/qmckl_blas_f.o IRPF90_temp/qmckl_dgemm.o -LIB= -mkl=sequential $(shell pkg-config --libs $(STARPU) magma) +LIB= -mkl=sequential $(shell pkg-config --libs $(STARPU) chameleon magma) -include irpf90.make export diff --git a/qmckl_dgemm.c b/qmckl_dgemm.c index ae86207..5a2ff40 100644 --- a/qmckl_dgemm.c +++ b/qmckl_dgemm.c @@ -1,6 +1,7 @@ /* Generated from qmckl_dgemm.org */ #include +#include #include #include @@ -223,64 +224,28 @@ void qmckl_dgemm(char transa, char transb, void qmckl_tasks_run(struct dgemm_args** gemms, int ngemms) { - int rc = starpu_init(NULL); - starpu_cublas_init(); + int NCPU, NGPU; + sscanf( getenv( "STARPU_NCPU" ), "%d", &NCPU ); + sscanf( getenv( "STARPU_NCUDA" ), "%d", &NGPU ); - assert (rc == 0); - - starpu_data_handle_t matrix_handle[ngemms][3]; + int rc = CHAMELEON_Init(NCPU, NGPU); for (int i=0 ; iA, - gemms[i]->lda, - gemms[i]->m, - gemms[i]->k, - sizeof(double)); - - starpu_matrix_data_register(&(matrix_handle[i][1]), - STARPU_MAIN_RAM, - (uintptr_t) gemms[i]->B, - gemms[i]->ldb, - gemms[i]->k, - gemms[i]->n, - sizeof(double)); - - starpu_matrix_data_register(&(matrix_handle[i][2]), - STARPU_MAIN_RAM, - (uintptr_t) gemms[i]->C, - gemms[i]->ldc, - gemms[i]->m, - gemms[i]->n, - sizeof(double)); - - struct starpu_task *task = starpu_task_create(); - - task->cl = &dgemm_cl; - task->cl_arg = gemms[i]; - task->cl_arg_size = sizeof(*gemms[0]); - task->handles[0] = matrix_handle[i][0]; - task->handles[1] = matrix_handle[i][1]; - task->handles[2] = matrix_handle[i][2]; - rc = starpu_task_submit(task); - assert (rc == 0); + CHAMELEON_dgemm(ChamNoTrans, ChamNoTrans, + gemms[i]->m, + gemms[i]->n, + gemms[i]->k, + gemms[i]->alpha, + gemms[i]->A, + gemms[i]->lda, + gemms[i]->B, + gemms[i]->ldb, + gemms[i]->beta, + gemms[i]->C, + gemms[i]->ldc); } - starpu_task_wait_for_all(); + CHAMELEON_Finalize(); - for (int i=0 ; i