2022-10-01 18:54:18 +02:00
|
|
|
#include <stdint.h>
|
2022-10-17 14:56:32 +02:00
|
|
|
#include "helper.h"
|
2022-07-11 14:48:59 +02:00
|
|
|
#include "cycles.h"
|
|
|
|
|
2022-10-01 18:54:18 +02:00
|
|
|
#define DATASET "dataset"
|
2022-09-09 17:15:12 +02:00
|
|
|
#define REPETITIONS 1
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
uint64_t n_splits;
|
|
|
|
uint64_t block_fail;
|
|
|
|
uint64_t recursive_calls;
|
|
|
|
|
|
|
|
int main(int argc, char **argv) {
|
|
|
|
assert(argc == 2);
|
|
|
|
char *version = argv[1];
|
|
|
|
|
2022-11-08 15:35:25 +01:00
|
|
|
#ifdef USE_OMP_OFFLOAD_CUDA
|
2022-10-17 15:26:30 +02:00
|
|
|
cublasHandle_t handle = init_cublas();
|
|
|
|
cusolverDnHandle_t s_handle = init_cusolver();
|
|
|
|
#endif
|
2022-09-09 17:15:12 +02:00
|
|
|
|
2022-10-17 15:15:16 +02:00
|
|
|
// SETUP DATA ACCESS
|
2022-10-01 18:54:18 +02:00
|
|
|
hid_t file_id = H5Fopen(DATASET, H5F_ACC_RDONLY, H5P_DEFAULT);
|
|
|
|
|
2022-11-08 15:35:25 +01:00
|
|
|
printf("\n# %d REPETITIONS\n", REPETITIONS);
|
2022-10-01 18:54:18 +02:00
|
|
|
printf("#----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n");
|
|
|
|
printf("#1\t2\t3\t4\t\t5\t6\t\t7\t\t8\t\t9\t\t10\t\t11\t\t12\t\t13\t\t14\n");
|
|
|
|
printf("#CYCLE\tUPDS\tERR_IN\tERR_BREAK\tERR_OUT\tSPLITS\t\tBLK_FAILS\tMAX\t\tFROB\t\tCOND\t\tCPU_CYC\t\tCPU_CYC/UPD\tCUMUL\t\tREC\n");
|
|
|
|
printf("#----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n");
|
2022-10-17 15:15:16 +02:00
|
|
|
|
2022-07-11 14:48:59 +02:00
|
|
|
// FOR EACH UPDATE CYCLE DO:
|
2022-11-08 15:35:25 +01:00
|
|
|
for (uint32_t cycles_index = 0; cycles_index < n_cycles; cycles_index++) {
|
2022-10-01 18:54:18 +02:00
|
|
|
|
|
|
|
// SETUP TEST PARAMETERS
|
2022-11-08 15:35:25 +01:00
|
|
|
// const uint32_t GHz = 2800000000; // 2.8 giga-cycles per second
|
2022-10-01 18:54:18 +02:00
|
|
|
const double breakdown = 0.001; // default = 0.001. 1e-9 might be too small
|
|
|
|
const double tolerance = 0.001; // default = 0.001
|
|
|
|
double cumulative = 0;
|
|
|
|
|
2022-07-11 14:48:59 +02:00
|
|
|
// 1. READ DATA FROM DATASET
|
|
|
|
uint32_t cycle = cycles[cycles_index];
|
2022-10-01 18:54:18 +02:00
|
|
|
uint64_t Dim = get_dim(cycle, file_id);
|
|
|
|
uint64_t Lds = Dim;
|
|
|
|
uint64_t N_updates = get_nupdates(cycle, file_id);
|
|
|
|
uint64_t* Updates_index = get_upd_idcs(cycle, file_id, N_updates);
|
|
|
|
double* Updates = get_upds(cycle, file_id, N_updates, Lds);
|
|
|
|
double* Slater = get_slater(cycle, file_id, Dim, Lds);
|
|
|
|
double* Slater_invT = get_slater_inv(cycle, file_id, Dim, Lds);
|
2022-10-17 15:15:16 +02:00
|
|
|
double* Slater_invT_copy = calloc(1, sizeof *Slater_invT_copy * Dim * Lds);
|
2022-10-01 18:54:18 +02:00
|
|
|
double determinant = get_determinant(cycle, file_id), determinant_copy;
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// Compute transpose of S. ST: 24 x 21
|
2022-10-01 18:54:18 +02:00
|
|
|
double *SlaterT = calloc(1, sizeof *SlaterT * Dim * Lds);
|
|
|
|
transpose(Slater, Lds, SlaterT, Dim, Dim, Lds);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// Convert repl. upds into additive upds.
|
2022-10-01 18:54:18 +02:00
|
|
|
convert(Updates, N_updates, Updates_index, SlaterT, Dim, Lds);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// 2. CHECK ERROR ON THE INPUT DATA AND RECORD RESULT: ERR_INPUT
|
2022-07-22 11:34:29 +02:00
|
|
|
uint32_t err_inp = check_error(Lds, Dim, Slater_invT, SlaterT, tolerance);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// Update Slater matrix
|
2022-10-01 18:54:18 +02:00
|
|
|
update(SlaterT, Updates, Updates_index, N_updates, Dim, Lds);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
int32_t err_break;
|
|
|
|
|
|
|
|
// 3. SET TIME- AND SPLIT ACCUMULATOR TO ZERO
|
|
|
|
double accumulator = 0;
|
|
|
|
double cycles_per_update = 0;
|
|
|
|
n_splits = 0;
|
|
|
|
block_fail = 0;
|
|
|
|
recursive_calls = 0;
|
|
|
|
|
2022-10-17 15:26:30 +02:00
|
|
|
// ## FOR A SET NUMBER OF REPETITIONS EXECUTE A CHOSEN KERNEL
|
2022-07-11 14:48:59 +02:00
|
|
|
for (int rep = 0; rep < REPETITIONS; rep++) {
|
|
|
|
|
|
|
|
// 1. MAKE A FRESH COPY OF THE SLATER INVERSE AND DETERMINANT AND USE THE COPY
|
2022-07-22 11:34:29 +02:00
|
|
|
memcpy(Slater_invT_copy, Slater_invT, Lds * Dim * sizeof(double));
|
2022-07-11 14:48:59 +02:00
|
|
|
determinant_copy = determinant;
|
|
|
|
|
|
|
|
// ### CHOOSE A KERNEL:
|
2022-07-21 12:21:51 +02:00
|
|
|
if (version[0] == 'n') { // Naive
|
2022-10-17 15:15:16 +02:00
|
|
|
|
2022-07-11 14:48:59 +02:00
|
|
|
// 1. FETCH START TIME
|
|
|
|
uint64_t before = rdtsc();
|
|
|
|
|
|
|
|
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
|
2022-07-22 11:34:29 +02:00
|
|
|
err_break = qmckl_sherman_morrison(Lds, Dim, N_updates, Updates,
|
2022-10-01 18:54:18 +02:00
|
|
|
Updates_index, breakdown, Slater_invT_copy, &determinant);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// 3. FETCH FINISH TIME
|
|
|
|
uint64_t after = rdtsc();
|
|
|
|
|
|
|
|
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
|
|
|
|
accumulator += (double)(after - before);
|
2022-10-17 15:15:16 +02:00
|
|
|
}
|
|
|
|
else if (version[0] == 'l') { // Later
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// 1. FETCH START TIME
|
|
|
|
uint64_t before = rdtsc();
|
|
|
|
|
|
|
|
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
|
2022-07-22 11:34:29 +02:00
|
|
|
err_break = qmckl_sherman_morrison_later(Lds, Dim, N_updates, Updates,
|
2022-10-01 18:54:18 +02:00
|
|
|
Updates_index, breakdown, Slater_invT_copy, &determinant);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// 3. FETCH FINISH TIME
|
|
|
|
uint64_t after = rdtsc();
|
|
|
|
|
|
|
|
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
|
|
|
|
accumulator += (double)(after - before);
|
2022-10-17 15:15:16 +02:00
|
|
|
}
|
|
|
|
else if (version[0] == '2') { // by twos
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// 1. FETCH START TIME
|
|
|
|
uint64_t before = rdtsc();
|
|
|
|
|
|
|
|
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
|
2022-07-22 11:34:29 +02:00
|
|
|
err_break = qmckl_woodbury_2(Lds, Dim, Updates, Updates_index,
|
2022-10-01 18:54:18 +02:00
|
|
|
breakdown, Slater_invT_copy, &determinant);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// 3. FETCH FINISH TIME
|
|
|
|
uint64_t after = rdtsc();
|
|
|
|
|
|
|
|
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
|
|
|
|
accumulator += (double)(after - before);
|
|
|
|
|
2022-10-17 15:15:16 +02:00
|
|
|
}
|
|
|
|
else if (version[0] == '3') { // by threes
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// 1. FETCH START TIME
|
|
|
|
uint64_t before = rdtsc();
|
|
|
|
|
|
|
|
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
|
2022-07-22 11:34:29 +02:00
|
|
|
err_break = qmckl_woodbury_3(Lds, Dim, Updates, Updates_index,
|
2022-10-01 18:54:18 +02:00
|
|
|
breakdown, Slater_invT_copy, &determinant);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// 3. FETCH FINISH TIME
|
|
|
|
uint64_t after = rdtsc();
|
|
|
|
|
|
|
|
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
|
|
|
|
accumulator += (double)(after - before);
|
|
|
|
|
2022-10-17 15:15:16 +02:00
|
|
|
}
|
|
|
|
else if (version[0] == 'k') { // Woodbury K
|
2022-07-20 19:09:55 +02:00
|
|
|
|
|
|
|
// 1. FETCH START TIME
|
|
|
|
uint64_t before = rdtsc();
|
|
|
|
|
|
|
|
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
|
2022-07-22 11:34:29 +02:00
|
|
|
err_break = qmckl_woodbury_k(Lds, Dim, N_updates, Updates,
|
2022-10-01 18:54:18 +02:00
|
|
|
Updates_index, breakdown, Slater_invT_copy, &determinant);
|
2022-07-20 19:09:55 +02:00
|
|
|
|
|
|
|
// 3. FETCH FINISH TIME
|
|
|
|
uint64_t after = rdtsc();
|
|
|
|
|
|
|
|
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
|
2022-10-17 15:15:16 +02:00
|
|
|
accumulator += (double) (after - before);
|
|
|
|
}
|
2022-11-08 15:35:25 +01:00
|
|
|
#ifdef USE_OMP
|
|
|
|
else if (version[0] == 'o') { // Woodbury K OMP
|
|
|
|
|
|
|
|
// 1. FETCH START TIME
|
|
|
|
uint64_t before = rdtsc();
|
|
|
|
|
|
|
|
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
|
|
|
|
err_break = qmckl_woodbury_k_omp(Lds, Dim, N_updates, Updates,
|
|
|
|
Updates_index, breakdown, Slater_invT_copy, &determinant);
|
|
|
|
|
|
|
|
// 3. FETCH FINISH TIME
|
|
|
|
uint64_t after = rdtsc();
|
|
|
|
|
|
|
|
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
|
|
|
|
accumulator += (double) (after - before);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef USE_OMP_OFFLOAD_CUDA
|
2022-10-17 15:15:16 +02:00
|
|
|
else if (version[0] == 'c') { // Woodbury K cuBLAS
|
2022-07-21 12:21:51 +02:00
|
|
|
|
|
|
|
// 1. FETCH START TIME
|
|
|
|
uint64_t before = rdtsc();
|
|
|
|
|
|
|
|
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
|
2022-11-08 15:35:25 +01:00
|
|
|
err_break = qmckl_woodbury_k_ompol_cuda_sync(handle, s_handle, Lds, Dim, N_updates, Updates,
|
|
|
|
Updates_index, breakdown, Slater_invT_copy, &determinant);
|
2022-07-21 12:21:51 +02:00
|
|
|
|
|
|
|
// 3. FETCH FINISH TIME
|
|
|
|
uint64_t after = rdtsc();
|
|
|
|
|
|
|
|
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
|
2022-10-17 15:15:16 +02:00
|
|
|
accumulator += (double) (after - before);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
else if (version[0] == 's') { // Splitting
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// 1. FETCH START TIME
|
|
|
|
uint64_t before = rdtsc();
|
|
|
|
|
|
|
|
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
|
2022-07-22 11:34:29 +02:00
|
|
|
err_break = qmckl_sherman_morrison_splitting(Lds, Dim, N_updates, Updates,
|
2022-10-01 18:54:18 +02:00
|
|
|
Updates_index, breakdown, Slater_invT_copy, &determinant);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// 3. FETCH FINISH TIME
|
|
|
|
uint64_t after = rdtsc();
|
|
|
|
|
|
|
|
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
|
|
|
|
accumulator += (double)(after - before);
|
2022-10-17 15:15:16 +02:00
|
|
|
}
|
|
|
|
else if (version[0] == 'b') { // Blocked
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// 1. FETCH START TIME
|
|
|
|
uint64_t before = rdtsc();
|
|
|
|
|
|
|
|
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
|
2022-07-22 11:34:29 +02:00
|
|
|
err_break = qmckl_sherman_morrison_smw32s(Lds, Dim, N_updates, Updates,
|
2022-10-01 18:54:18 +02:00
|
|
|
Updates_index, breakdown, Slater_invT_copy, &determinant);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// 3. FETCH FINISH TIME
|
|
|
|
uint64_t after = rdtsc();
|
|
|
|
|
|
|
|
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
|
|
|
|
accumulator += (double)(after - before);
|
2022-10-17 15:15:16 +02:00
|
|
|
}
|
|
|
|
else if (version[0] == 'm') { // LAPACK/MKL
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// Only send upper Dim x Dim part of matrix to lapack
|
2022-10-01 18:54:18 +02:00
|
|
|
double *tmp = malloc(sizeof *tmp * Dim * Dim);
|
|
|
|
memcpy(tmp, SlaterT, sizeof *SlaterT * Dim * Dim);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// 1. FETCH START TIME
|
|
|
|
uint64_t before = rdtsc();
|
|
|
|
|
|
|
|
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
|
|
|
|
err_break = inverse(tmp, Dim, Dim);
|
|
|
|
|
|
|
|
// 3. FETCH FINISH TIME
|
|
|
|
uint64_t after = rdtsc();
|
|
|
|
|
|
|
|
// Copy elements of inverse back, adding 0-padding in "correct" place
|
2022-10-01 18:54:18 +02:00
|
|
|
copy(Slater_invT_copy, Lds, tmp, Dim);
|
|
|
|
free(tmp);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
|
|
|
|
accumulator += (double)(after - before);
|
|
|
|
|
2022-10-17 15:15:16 +02:00
|
|
|
}
|
|
|
|
else { // Exit
|
2022-07-11 14:48:59 +02:00
|
|
|
printf("Version '%c' not implemented.\n", version[0]);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
} // END OF REPETITIONS LOOP
|
|
|
|
|
2022-10-01 18:54:18 +02:00
|
|
|
// 4. COPY RESULT BACK TO ORIGINAL
|
2022-07-22 11:34:29 +02:00
|
|
|
memcpy(Slater_invT, Slater_invT_copy, Lds * Dim * sizeof(double));
|
2022-07-11 14:48:59 +02:00
|
|
|
determinant = determinant_copy;
|
|
|
|
|
|
|
|
// 5. DIVIDE CYCLE- AND SPLIT-ACCUMULATOR BY NUMBER OF REPETITIONS AND RECORD
|
|
|
|
// DIVIDE CYCLE-ACCUMULATOR BY NUMBER OF UPDATES AND RECORD
|
|
|
|
accumulator /= REPETITIONS;
|
|
|
|
cycles_per_update = accumulator / N_updates;
|
|
|
|
n_splits /= REPETITIONS;
|
|
|
|
block_fail /= REPETITIONS;
|
|
|
|
recursive_calls /= REPETITIONS;
|
|
|
|
|
|
|
|
// 6. ADD THE AVERAGED TIME PER CYCLE OF ACCUMULATER TO
|
|
|
|
// CUMULATIVE RESULT FOR THE ENTIRE DATASET
|
|
|
|
cumulative += accumulator;
|
|
|
|
|
2022-10-01 18:54:18 +02:00
|
|
|
// Compute Slater x Slater_inv
|
|
|
|
double* SSi = malloc(sizeof *SSi * Dim * Dim);
|
|
|
|
double alpha = 1.0, beta = 0.0;
|
|
|
|
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
|
|
|
|
Dim, Dim, Dim,
|
|
|
|
alpha, SlaterT, Dim, Slater_invT, Lds,
|
|
|
|
beta, SSi, Dim);
|
|
|
|
|
|
|
|
// Compute residual matrix S * Sinv - Id
|
|
|
|
double* Res = calloc(1, sizeof *Res * Dim * Dim);
|
2022-07-11 14:48:59 +02:00
|
|
|
residual(SSi, Res, Dim);
|
|
|
|
const double max = max_norm(Res, Dim, Dim);
|
2022-10-01 18:54:18 +02:00
|
|
|
free(SSi);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// 7. CHECK ERRROR ON THE UPDATED DATA AND RECORD THE RESULT: ERR_OUT
|
2022-07-22 11:34:29 +02:00
|
|
|
uint32_t err_out = check_error(Lds, Dim, Slater_invT, SlaterT, tolerance);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// if (err_out == 1) printf("cycle index %d: cycle %d with %lu upds failed!\n", cycles_index, cycle, N_updates);
|
|
|
|
|
|
|
|
// 8. COMPUTE CONDITION NUMBER
|
2022-07-22 11:34:29 +02:00
|
|
|
const double condnr = condition_number(Slater, Slater_invT, Lds, Dim);
|
2022-07-11 14:48:59 +02:00
|
|
|
const double frob = frobenius_norm(Res, Dim, Dim);
|
2022-10-01 18:54:18 +02:00
|
|
|
free(Res);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
// 10. WRITE RESULTS TO FILE: CYCLE#, #UPDS, ERR_INP, ERR_BREAK, #SPLITS, ERR_OUT, COND, #CLCK_TCKS
|
2022-11-08 15:35:25 +01:00
|
|
|
printf("%u\t%lu\t%u\t%u\t\t%u\t%lu\t\t%lu\t\t%e\t%e\t%e\t%e\t%e\t%e\t%lu\n", cycle, N_updates, err_inp, err_break, err_out, n_splits, block_fail, max, frob, condnr, accumulator, cycles_per_update, cumulative, recursive_calls);
|
|
|
|
// printf("%u\t%lu\t%u\t%u\t\t%u\t%lu\t\t%lu\t\t%e\t%e\t%e\t%9.6f\t%9.6f\t%9.6f\t%lu\n", cycle, N_updates, err_inp, err_break, err_out, n_splits, block_fail, max, frob, condnr, (double)accumulator/GHz, (double)cycles_per_update/GHz, (double)cumulative/GHz, recursive_calls);
|
2022-10-01 18:54:18 +02:00
|
|
|
|
2022-07-11 14:48:59 +02:00
|
|
|
free(Updates_index);
|
|
|
|
free(Updates);
|
2022-10-01 18:54:18 +02:00
|
|
|
free(SlaterT);
|
|
|
|
free(Slater_invT);
|
|
|
|
free(Slater_invT_copy);
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
} // END OF CYCLE LOOP
|
2022-10-17 15:26:30 +02:00
|
|
|
|
2022-11-08 15:35:25 +01:00
|
|
|
// printf("#----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n");
|
|
|
|
// printf("#1\t2\t3\t4\t\t5\t6\t\t7\t\t8\t\t9\t\t10\t\t11\t\t12\t\t13\t\t14\n");
|
|
|
|
// printf("#CYCLE\tUPDS\tERR_IN\tERR_BREAK\tERR_OUT\tSPLITS\t\tBLK_FAILS\tMAX\t\tFROB\t\tCOND\t\tCPU_CYC\t\tCPU_CYC/UPD\tCUMUL\t\tREC\n");
|
|
|
|
// printf("#----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n");
|
2022-07-11 14:48:59 +02:00
|
|
|
|
|
|
|
(void) H5Fclose(file_id);
|
2022-09-09 17:15:12 +02:00
|
|
|
|
2022-11-08 15:35:25 +01:00
|
|
|
#ifdef USE_OMP_OFFLOAD_CUDA
|
2022-10-17 15:26:30 +02:00
|
|
|
cublasDestroy_v2(handle);
|
|
|
|
cusolverDnDestroy(s_handle);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return 0;
|
2022-07-11 14:48:59 +02:00
|
|
|
}
|