1
0
mirror of https://github.com/TREX-CoE/irpjast.git synced 2025-01-03 01:56:19 +01:00

Chameleon

This commit is contained in:
Anthony Scemama 2021-06-04 13:45:58 +02:00
parent aabd6cf003
commit 9b3ecb935a
2 changed files with 20 additions and 55 deletions

View File

@ -2,7 +2,7 @@ STARPU=~/StarPU/starpu-1.3.7/starpu-1.3.pc
IRPF90 = irpf90/bin/irpf90 --codelet=factor_een:2 --align=64 # -s nelec_8:504 -s nnuc:100 -s ncord:5 #-a -d
#FC = ifort -mkl=sequential -xCORE-AVX512 -g -qopt-zmm-usage=high
FC = ifort -mkl=sequential -march=core-avx2 -align array64byte -fma -ftz -fomit-frame-pointer
CC = gcc $(shell pkg-config --cflags $(STARPU) )
CC = gcc $(shell pkg-config --cflags $(STARPU) chameleon)
FCFLAGS= -O3 -I .
NINJA = ninja
ARCHIVE = ar crs
@ -10,7 +10,7 @@ RANLIB = ranlib
SRC= qmckl_blas_f.f90 qmckl_dgemm.c
OBJ= IRPF90_temp/qmckl_blas_f.o IRPF90_temp/qmckl_dgemm.o
LIB= -mkl=sequential $(shell pkg-config --libs $(STARPU) magma)
LIB= -mkl=sequential $(shell pkg-config --libs $(STARPU) chameleon magma)
-include irpf90.make
export

View File

@ -1,6 +1,7 @@
/* Generated from qmckl_dgemm.org */
#include <starpu.h>
#include <chameleon.h>
#include <stdint.h>
#include <assert.h>
@ -223,64 +224,28 @@ void qmckl_dgemm(char transa, char transb,
void qmckl_tasks_run(struct dgemm_args** gemms, int ngemms)
{
int rc = starpu_init(NULL);
starpu_cublas_init();
int NCPU, NGPU;
sscanf( getenv( "STARPU_NCPU" ), "%d", &NCPU );
sscanf( getenv( "STARPU_NCUDA" ), "%d", &NGPU );
assert (rc == 0);
starpu_data_handle_t matrix_handle[ngemms][3];
int rc = CHAMELEON_Init(NCPU, NGPU);
for (int i=0 ; i<ngemms ; ++i)
{
starpu_matrix_data_register(&(matrix_handle[i][0]),
STARPU_MAIN_RAM,
(uintptr_t) gemms[i]->A,
CHAMELEON_dgemm(ChamNoTrans, ChamNoTrans,
gemms[i]->m,
gemms[i]->n,
gemms[i]->k,
gemms[i]->alpha,
gemms[i]->A,
gemms[i]->lda,
gemms[i]->m,
gemms[i]->k,
sizeof(double));
starpu_matrix_data_register(&(matrix_handle[i][1]),
STARPU_MAIN_RAM,
(uintptr_t) gemms[i]->B,
gemms[i]->B,
gemms[i]->ldb,
gemms[i]->k,
gemms[i]->n,
sizeof(double));
starpu_matrix_data_register(&(matrix_handle[i][2]),
STARPU_MAIN_RAM,
(uintptr_t) gemms[i]->C,
gemms[i]->ldc,
gemms[i]->m,
gemms[i]->n,
sizeof(double));
struct starpu_task *task = starpu_task_create();
task->cl = &dgemm_cl;
task->cl_arg = gemms[i];
task->cl_arg_size = sizeof(*gemms[0]);
task->handles[0] = matrix_handle[i][0];
task->handles[1] = matrix_handle[i][1];
task->handles[2] = matrix_handle[i][2];
rc = starpu_task_submit(task);
assert (rc == 0);
gemms[i]->beta,
gemms[i]->C,
gemms[i]->ldc);
}
starpu_task_wait_for_all();
CHAMELEON_Finalize();
for (int i=0 ; i<ngemms ; ++i)
{
starpu_data_unregister(matrix_handle[i][0]);
starpu_data_unregister(matrix_handle[i][1]);
starpu_data_unregister(matrix_handle[i][2]);
}
starpu_shutdown();
}
void alloc(void** ptr, int64_t size) {
printf("size: %ld\n", size);
starpu_malloc(ptr, (size_t) size * sizeof(double));
}