mirror of
https://github.com/TREX-CoE/Sherman-Morrison.git
synced 2024-12-24 13:23:45 +01:00
Added independent test harness, written in C. It has it's own Makefile and datasets. It is completely independent of the main tree.
This commit is contained in:
parent
8bab304cb5
commit
732045284a
43
independent_test_harness/Makefile
Normal file
43
independent_test_harness/Makefile
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
# FC = gfortran
|
||||||
|
# CC = gcc
|
||||||
|
# FFLAGS=-O0 -finline -g -lm -Wall -pedantic
|
||||||
|
# CFLAGS=-std=c99 -O0 -finline -g -lm -Wall -pedantic
|
||||||
|
FC = ifort
|
||||||
|
CC = icc
|
||||||
|
# FFLAGS=-O0 -warn all -g -pedantic
|
||||||
|
# CFLAGS=-std=c99 -O0 -Wall -g -pedantic
|
||||||
|
FFLAGS=-O3 -warn all -ip -finline -ftz -xCORE-AVX2 -g
|
||||||
|
CFLAGS=-std=c99 -O3 -Wall -ip -finline -ftz -xCORE-AVX2 -g
|
||||||
|
INCLUDE=-I/usr/include/hdf5/serial
|
||||||
|
LFLAGS=-L/usr/lib/x86_64-linux-gnu/hdf5/serial -lhdf5 -lhdf5_hl -qmkl=sequential
|
||||||
|
#FC = verificarlo-f
|
||||||
|
#CC = verificarlo-c
|
||||||
|
#FFLAGS=-O3 -finline -g
|
||||||
|
#CFLAGS=-O3 -finline -g
|
||||||
|
|
||||||
|
## Link with icc
|
||||||
|
test: sm.o test.o detupdate21.o meuk.o
|
||||||
|
$(CC) $(LFLAGS) -o test sm.o detupdate21.o test.o meuk.o
|
||||||
|
|
||||||
|
## Link with ifort
|
||||||
|
# test: sm.o test.o detupdate21.o meuk.o
|
||||||
|
# $(FC) $(LFLAGS) -nofor-main -o test sm.o detupdate21.o test.o meuk.o
|
||||||
|
|
||||||
|
## Link with gfortran
|
||||||
|
# test: sm.o test.o detupdate21.o meuk.o
|
||||||
|
# $(FC) $(LFLAGS) -Wno-main -o test sm.o detupdate21.o test.o meuk.o
|
||||||
|
|
||||||
|
%.o: %.f90
|
||||||
|
$(FC) $(FFLAGS) -c -o $@ $<
|
||||||
|
|
||||||
|
%.o : %.c
|
||||||
|
$(CC) $(CFLAGS) $(INCLUDE) -c -o $@ $<
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rf *.o *genmod* test test
|
||||||
|
|
||||||
|
debug_n_2: test
|
||||||
|
gdb --silent --args ./test n 2
|
||||||
|
|
||||||
|
debug_a_2: test
|
||||||
|
gdb --silent --args ./test a 2
|
33
independent_test_harness/Test_method.md
Normal file
33
independent_test_harness/Test_method.md
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
Test method
|
||||||
|
===========
|
||||||
|
|
||||||
|
# > for each update cycle do:
|
||||||
|
(# of updates changes -> update indices & size of update-matrix changes)
|
||||||
|
|
||||||
|
1. read data from dataset
|
||||||
|
2. check error on the input data and record result: ERR_INPUT
|
||||||
|
3. set cycle- and split accumulator to zero
|
||||||
|
|
||||||
|
## >> for a set number of repetitions do:
|
||||||
|
|
||||||
|
1. take a fresh copy (memcpy) of the slater inverse and use it in chosen kernel
|
||||||
|
|
||||||
|
### >>> for the chosen kernel do:
|
||||||
|
|
||||||
|
1. fetch start cycles
|
||||||
|
2. execute kernel and remember exit status: ERR_BREAK
|
||||||
|
(number of splits is recorded in global variable)
|
||||||
|
3. fetch finish cycles
|
||||||
|
4. add cycle difference to time acummulator
|
||||||
|
|
||||||
|
## > continue: for each update cycle do
|
||||||
|
|
||||||
|
4. copy the updated slater-inverse-copy back to original
|
||||||
|
5a. divide cycle- and split-accumulator by number of repetitions
|
||||||
|
5b. divide cycle-accumulator by number of updates
|
||||||
|
6. add the averaged time/update-cycle of accumulater to cummulative-
|
||||||
|
result for the entire dataset
|
||||||
|
7. update the slater matrix
|
||||||
|
8. check the error on the updated data and record the result: ERR_OUT
|
||||||
|
9. write results to stdout: cycle#, #upds, err_inp, err_break,
|
||||||
|
#splits, err_out, #clck_tcks, #clck_tcks/upd, cumulative cycles
|
27
independent_test_harness/condition_numbers.py
Executable file
27
independent_test_harness/condition_numbers.py
Executable file
@ -0,0 +1,27 @@
|
|||||||
|
import numpy as np
|
||||||
|
import h5py
|
||||||
|
|
||||||
|
h5file = h5py.File('dataset_zeropadded_cm.hdf5', 'r')
|
||||||
|
print(f"#cycle, det, cond_2, cond_f, norm_2, norm_f")
|
||||||
|
print(f"# 1, 2, 3, 4, 5, 6")
|
||||||
|
for key in h5file.keys():
|
||||||
|
cycle = h5file.get(key)
|
||||||
|
slater_matrix = cycle.get('slater_matrix')
|
||||||
|
slater_matrix = np.array(slater_matrix )
|
||||||
|
slater_inverse_t = cycle.get('slater_inverse_t')
|
||||||
|
|
||||||
|
slater_inverse_t = np.array(slater_inverse_t)
|
||||||
|
slater_inverse = slater_inverse_t.transpose()
|
||||||
|
slater_matrix_sq = slater_matrix[:, 0:21]
|
||||||
|
slater_inverse_sq = slater_inverse[0:21, :]
|
||||||
|
|
||||||
|
det = np.linalg.det(slater_matrix_sq)
|
||||||
|
cond_2 = np.linalg.cond(slater_inverse_sq, p=2)
|
||||||
|
cond_f = np.linalg.cond(slater_inverse_sq, p='fro')
|
||||||
|
|
||||||
|
Id_appr = np.matmul(slater_matrix_sq, slater_inverse_sq)
|
||||||
|
Err = Id_appr - np.identity(Id_appr.shape[0])
|
||||||
|
normf = np.linalg.norm(Err, ord='fro')
|
||||||
|
norm2 = np.linalg.norm(Err, ord=2)
|
||||||
|
print(f"{key[6:]}, {det}, {cond_2}, {cond_f}, {norm2}, {normf}")
|
||||||
|
h5file.close()
|
1
independent_test_harness/cycles.h
Symbolic link
1
independent_test_harness/cycles.h
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
cycles_329_dets/all_cycles.h
|
2
independent_test_harness/cycles_15784_dets/11_cycles.h
Normal file
2
independent_test_harness/cycles_15784_dets/11_cycles.h
Normal file
File diff suppressed because one or more lines are too long
2
independent_test_harness/cycles_15784_dets/12_cycles.h
Normal file
2
independent_test_harness/cycles_15784_dets/12_cycles.h
Normal file
File diff suppressed because one or more lines are too long
2
independent_test_harness/cycles_15784_dets/15_cycles.h
Normal file
2
independent_test_harness/cycles_15784_dets/15_cycles.h
Normal file
File diff suppressed because one or more lines are too long
2
independent_test_harness/cycles_15784_dets/1_cycles.h
Normal file
2
independent_test_harness/cycles_15784_dets/1_cycles.h
Normal file
File diff suppressed because one or more lines are too long
2
independent_test_harness/cycles_15784_dets/2_cycles.h
Normal file
2
independent_test_harness/cycles_15784_dets/2_cycles.h
Normal file
File diff suppressed because one or more lines are too long
2
independent_test_harness/cycles_15784_dets/3_cycles.h
Normal file
2
independent_test_harness/cycles_15784_dets/3_cycles.h
Normal file
File diff suppressed because one or more lines are too long
2
independent_test_harness/cycles_15784_dets/4_cycles.h
Normal file
2
independent_test_harness/cycles_15784_dets/4_cycles.h
Normal file
File diff suppressed because one or more lines are too long
2
independent_test_harness/cycles_15784_dets/5_cycles.h
Normal file
2
independent_test_harness/cycles_15784_dets/5_cycles.h
Normal file
File diff suppressed because one or more lines are too long
2
independent_test_harness/cycles_15784_dets/9_cycles.h
Normal file
2
independent_test_harness/cycles_15784_dets/9_cycles.h
Normal file
File diff suppressed because one or more lines are too long
2
independent_test_harness/cycles_15784_dets/all_cycles.h
Normal file
2
independent_test_harness/cycles_15784_dets/all_cycles.h
Normal file
File diff suppressed because one or more lines are too long
2
independent_test_harness/cycles_329_dets/11_cycles.h
Normal file
2
independent_test_harness/cycles_329_dets/11_cycles.h
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
const uint32_t n_cycles = 286;
|
||||||
|
uint32_t cycles[n_cycles] = {26,27,28,29,31,32,76,301,325,355,356,357,358,360,361,405,630,654,684,685,686,687,689,690,734,959,983,1013,1014,1015,1016,1018,1019,1063,1288,1312,1342,1343,1344,1345,1347,1348,1392,1617,1641,1671,1672,1673,1674,1676,1677,1721,1946,1970,2000,2001,2002,2003,2005,2006,2050,2275,2299,2329,2330,2331,2332,2334,2335,2379,2604,2628,2658,2659,2660,2661,2663,2664,2708,2933,2957,2987,2988,2989,2990,2992,2993,3037,3262,3286,3316,3317,3318,3319,3321,3322,3366,3591,3615,3645,3646,3647,3648,3650,3651,3695,3920,3944,3974,3975,3976,3977,3979,3980,4024,4249,4273,4303,4304,4305,4306,4308,4309,4353,4578,4602,4632,4633,4634,4635,4637,4638,4682,4907,4931,4961,4962,4963,4964,4966,4967,5011,5236,5260,5290,5291,5292,5293,5295,5296,5340,5565,5589,5619,5620,5621,5622,5624,5625,5669,5894,5918,5948,5949,5950,5951,5953,5954,5998,6223,6247,6277,6278,6279,6280,6282,6283,6327,6552,6576,6606,6607,6608,6609,6611,6612,6656,6881,6905,6935,6936,6937,6938,6940,6941,6985,7210,7234,7264,7265,7266,7267,7269,7270,7314,7539,7563,7593,7594,7595,7596,7598,7599,7643,7868,7892,7922,7923,7924,7925,7927,7928,7972,8197,8221,8251,8252,8253,8254,8256,8257,8301,8526,8550,8580,8581,8582,8583,8585,8586,8630,8855,8879,8909,8910,8911,8912,8914,8915,8959,9184,9208,9238,9239,9240,9241,9243,9244,9288,9513,9537,9567,9568,9569,9570,9572,9573,9617,9842,9866,9896,9897,9898,9899,9901,9902,9946,10171,10195,10225,10226,10227,10228,10230,10231,10275};
|
2
independent_test_harness/cycles_329_dets/12_cycles.h
Normal file
2
independent_test_harness/cycles_329_dets/12_cycles.h
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
const uint32_t n_cycles = 288;
|
||||||
|
uint32_t cycles[n_cycles] = {22,44,45,46,47,50,51,63,184,351,373,374,375,376,379,380,392,513,680,702,703,704,705,708,709,721,842,1009,1031,1032,1033,1034,1037,1038,1050,1171,1338,1360,1361,1362,1363,1366,1367,1379,1500,1667,1689,1690,1691,1692,1695,1696,1708,1829,1996,2018,2019,2020,2021,2024,2025,2037,2158,2325,2347,2348,2349,2350,2353,2354,2366,2487,2654,2676,2677,2678,2679,2682,2683,2695,2816,2983,3005,3006,3007,3008,3011,3012,3024,3145,3312,3334,3335,3336,3337,3340,3341,3353,3474,3641,3663,3664,3665,3666,3669,3670,3682,3803,3970,3992,3993,3994,3995,3998,3999,4011,4132,4299,4321,4322,4323,4324,4327,4328,4340,4461,4628,4650,4651,4652,4653,4656,4657,4669,4790,4957,4979,4980,4981,4982,4985,4986,4998,5119,5286,5308,5309,5310,5311,5314,5315,5327,5448,5615,5637,5638,5639,5640,5643,5644,5656,5777,5944,5966,5967,5968,5969,5972,5973,5985,6106,6273,6295,6296,6297,6298,6301,6302,6314,6435,6602,6624,6625,6626,6627,6630,6631,6643,6764,6931,6953,6954,6955,6956,6959,6960,6972,7093,7260,7282,7283,7284,7285,7288,7289,7301,7422,7589,7611,7612,7613,7614,7617,7618,7630,7751,7918,7940,7941,7942,7943,7946,7947,7959,8080,8247,8269,8270,8271,8272,8275,8276,8288,8409,8576,8598,8599,8600,8601,8604,8605,8617,8738,8905,8927,8928,8929,8930,8933,8934,8946,9067,9234,9256,9257,9258,9259,9262,9263,9275,9396,9563,9585,9586,9587,9588,9591,9592,9604,9725,9892,9914,9915,9916,9917,9920,9921,9933,10054,10221,10243,10244,10245,10246,10249,10250,10262,10383};
|
2
independent_test_harness/cycles_329_dets/15_cycles.h
Normal file
2
independent_test_harness/cycles_329_dets/15_cycles.h
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
const uint32_t n_cycles = 64;
|
||||||
|
uint32_t cycles[n_cycles] = {41,170,370,499,699,828,1028,1157,1357,1486,1686,1815,2015,2144,2344,2473,2673,2802,3002,3131,3331,3460,3660,3789,3989,4118,4318,4447,4647,4776,4976,5105,5305,5434,5634,5763,5963,6092,6292,6421,6621,6750,6950,7079,7279,7408,7608,7737,7937,8066,8266,8395,8595,8724,8924,9053,9253,9382,9582,9711,9911,10040,10240,10369};
|
2
independent_test_harness/cycles_329_dets/1_cycles.h
Normal file
2
independent_test_harness/cycles_329_dets/1_cycles.h
Normal file
File diff suppressed because one or more lines are too long
2
independent_test_harness/cycles_329_dets/2_cycles.h
Normal file
2
independent_test_harness/cycles_329_dets/2_cycles.h
Normal file
File diff suppressed because one or more lines are too long
2
independent_test_harness/cycles_329_dets/3_cycles.h
Normal file
2
independent_test_harness/cycles_329_dets/3_cycles.h
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
const uint32_t n_cycles = 978;
|
||||||
|
uint32_t cycles[n_cycles] = {14,60,65,67,80,81,89,92,139,153,158,172,173,178,179,180,215,223,238,263,268,277,281,284,287,293,298,306,307,318,319,343,389,394,396,409,410,418,421,468,482,487,501,502,507,508,509,544,552,567,592,597,606,610,613,616,622,627,635,636,647,648,672,718,723,725,738,739,747,750,797,811,816,830,831,836,837,838,873,881,896,921,926,935,939,942,945,951,956,964,965,976,977,1001,1047,1052,1054,1067,1068,1076,1079,1126,1140,1145,1159,1160,1165,1166,1167,1202,1210,1225,1250,1255,1264,1268,1271,1274,1280,1285,1293,1294,1305,1306,1330,1376,1381,1383,1396,1397,1405,1408,1455,1469,1474,1488,1489,1494,1495,1496,1531,1539,1554,1579,1584,1593,1597,1600,1603,1609,1614,1622,1623,1634,1635,1659,1705,1710,1712,1725,1726,1734,1737,1784,1798,1803,1817,1818,1823,1824,1825,1860,1868,1883,1908,1913,1922,1926,1929,1932,1938,1943,1951,1952,1963,1964,1988,2034,2039,2041,2054,2055,2063,2066,2113,2127,2132,2146,2147,2152,2153,2154,2189,2197,2212,2237,2242,2251,2255,2258,2261,2267,2272,2280,2281,2292,2293,2317,2363,2368,2370,2383,2384,2392,2395,2442,2456,2461,2475,2476,2481,2482,2483,2518,2526,2541,2566,2571,2580,2584,2587,2590,2596,2601,2609,2610,2621,2622,2646,2692,2697,2699,2712,2713,2721,2724,2771,2785,2790,2804,2805,2810,2811,2812,2847,2855,2870,2895,2900,2909,2913,2916,2919,2925,2930,2938,2939,2950,2951,2975,3021,3026,3028,3041,3042,3050,3053,3100,3114,3119,3133,3134,3139,3140,3141,3176,3184,3199,3224,3229,3238,3242,3245,3248,3254,3259,3267,3268,3279,3280,3304,3350,3355,3357,3370,3371,3379,3382,3429,3443,3448,3462,3463,3468,3469,3470,3505,3513,3528,3553,3558,3567,3571,3574,3577,3583,3588,3596,3597,3608,3609,3633,3679,3684,3686,3699,3700,3708,3711,3758,3772,3777,3791,3792,3797,3798,3799,3834,3842,3857,3882,3887,3896,3900,3903,3906,3912,3917,3925,3926,3937,3938,3962,4008,4013,4015,4028,4029,4037,4040,4087,4101,4106,4120,4121,4126,4127,4128,4163,4171,4186,4211,4216,4225,4229,4232,4235,4241,4246,4254,4255,4266,4267,4291,4337,4342,4344,4357,4358,4366,4369,4416,4430,4435,4449,4450,4455,4456,4457,4492,4500,4515,4540,4545,4554,4558,4561,4564,4570,4575,4583,4584,4595,4596,4620,4666,4671,4673,4686,4687,4695,4698,4745,4759,4764,4778,4779,4784,4785,4786,4821,4829,4844,4869,4874,4883,4887,4890,4893,4899,4904,4912,4913,4924,4925,4949,4995,5000,5002,5015,5016,5024,5027,5074,5088,5093,5107,5108,5113,5114,5115,5150,5158,5173,5198,5203,5212,5216,5219,5222,5228,5233,5241,5242,5253,5254,5278,5324,5329,5331,5344,5345,5353,5356,5403,5417,5422,5436,5437,5442,5443,5444,5479,5487,5502,5527,5532,5541,5545,5548,5551,5557,5562,5570,5571,5582,5583,5607,5653,5658,5660,5673,5674,5682,5685,5732,5746,5751,5765,5766,5771,5772,5773,5808,5816,5831,5856,5861,5870,5874,5877,5880,5886,5891,5899,5900,5911,5912,5936,5982,5987,5989,6002,6003,6011,6014,6061,6075,6080,6094,6095,6100,6101,6102,6137,6145,6160,6185,6190,6199,6203,6206,6209,6215,6220,6228,6229,6240,6241,6265,6311,6316,6318,6331,6332,6340,6343,6390,6404,6409,6423,6424,6429,6430,6431,6466,6474,6489,6514,6519,6528,6532,6535,6538,6544,6549,6557,6558,6569,6570,6594,6640,6645,6647,6660,6661,6669,6672,6719,6733,6738,6752,6753,6758,6759,6760,6795,6803,6818,6843,6848,6857,6861,6864,6867,6873,6878,6886,6887,6898,6899,6923,6969,6974,6976,6989,6990,6998,7001,7048,7062,7067,7081,7082,7087,7088,7089,7124,7132,7147,7172,7177,7186,7190,7193,7196,7202,7207,7215,7216,7227,7228,7252,7298,7303,7305,7318,7319,7327,7330,7377,7391,7396,7410,7411,7416,7417,7418,7453,7461,7476,7501,7506,7515,7519,7522,7525,7531,7536,7544,7545,7556,7557,7581,7627,7632,7634,7647,7648,7656,7659,7706,7720,7725,7739,7740,7745,7746,7747,7782,7790,7805,7830,7835,7844,7848,7851,7854,7860,7865,7873,7874,7885,7886,7910,7956,7961,7963,7976,7977,7985,7988,8035,8049,8054,8068,8069,8074,8075,8076,8111,8119,8134,8159,8164,8173,8177,8180,8183,8189,8194,8202,8203,8214,8215,8239,8285,8290,8292,8305,8306,8314,8317,8364,8378,8383,8397,8398,8403,8404,8405,8440,8448,8463,8488,8493,8502,8506,8509,8512,8518,8523,8531,8532,8543,8544,8568,8614,8619,8621,8634,8635,8643,8646,8693,8707,8712,8726,8727,8732,8733,8734,8769,8777,8792,8817,8822,8831,8835,8838,8841,8847,8852,8860,8861,8872,8873,8897,8943,8948,8950,8963,8964,8972,8975,9022,9036,9041,9055,9056,9061,9062,9063,9098,9106,9121,9146,9151,9160,9164,9167,9170,9176,9181,9189,9190,9201,9202,9226,9272,9277,9279,9292,9293,9301,9304,9351,9365,9370,9384,9385,9390,9391,9392,9427,9435,9450,9475,9480,9489,9493,9496,9499,9505,9510,9518,9519,9530,9531,9555,9601,9606,9608,9621,9622,9630,9633,9680,9694,9699,9713,9714,9719,9720,9721,9756,9764,9779,9804,9809,9818,9822,9825,9828,9834,9839,9847,9848,9859,9860,9884,9930,9935,9937,9950,9951,9959,9962,10009,10023,10028,10042,10043,10048,10049,10050,10085,10093,10108,10133,10138,10147,10151,10154,10157,10163,10168,10176,10177,10188,10189,10213,10259,10264,10266,10279,10280,10288,10291,10338,10352,10357,10371,10372,10377,10378,10379,10414};
|
2
independent_test_harness/cycles_329_dets/4_cycles.h
Normal file
2
independent_test_harness/cycles_329_dets/4_cycles.h
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
const uint32_t n_cycles = 725;
|
||||||
|
uint32_t cycles[n_cycles] = {66,70,97,105,159,160,193,200,203,204,206,207,236,243,264,265,269,270,271,274,282,289,327,395,399,426,434,488,489,522,529,532,533,535,536,565,572,593,594,598,599,600,603,611,618,656,724,728,755,763,817,818,851,858,861,862,864,865,894,901,922,923,927,928,929,932,940,947,985,1053,1057,1084,1092,1146,1147,1180,1187,1190,1191,1193,1194,1223,1230,1251,1252,1256,1257,1258,1261,1269,1276,1314,1382,1386,1413,1421,1475,1476,1509,1516,1519,1520,1522,1523,1552,1559,1580,1581,1585,1586,1587,1590,1598,1605,1643,1711,1715,1742,1750,1804,1805,1838,1845,1848,1849,1851,1852,1881,1888,1909,1910,1914,1915,1916,1919,1927,1934,1972,2040,2044,2071,2079,2133,2134,2167,2174,2177,2178,2180,2181,2210,2217,2238,2239,2243,2244,2245,2248,2256,2263,2301,2369,2373,2400,2408,2462,2463,2496,2503,2506,2507,2509,2510,2539,2546,2567,2568,2572,2573,2574,2577,2585,2592,2630,2698,2702,2729,2737,2791,2792,2825,2832,2835,2836,2838,2839,2868,2875,2896,2897,2901,2902,2903,2906,2914,2921,2959,3027,3031,3058,3066,3120,3121,3154,3161,3164,3165,3167,3168,3197,3204,3225,3226,3230,3231,3232,3235,3243,3250,3288,3356,3360,3387,3395,3449,3450,3483,3490,3493,3494,3496,3497,3526,3533,3554,3555,3559,3560,3561,3564,3572,3579,3617,3685,3689,3716,3724,3778,3779,3812,3819,3822,3823,3825,3826,3855,3862,3883,3884,3888,3889,3890,3893,3901,3908,3946,4014,4018,4045,4053,4107,4108,4141,4148,4151,4152,4154,4155,4184,4191,4212,4213,4217,4218,4219,4222,4230,4237,4275,4343,4347,4374,4382,4436,4437,4470,4477,4480,4481,4483,4484,4513,4520,4541,4542,4546,4547,4548,4551,4559,4566,4604,4672,4676,4703,4711,4765,4766,4799,4806,4809,4810,4812,4813,4842,4849,4870,4871,4875,4876,4877,4880,4888,4895,4933,5001,5005,5032,5040,5094,5095,5128,5135,5138,5139,5141,5142,5171,5178,5199,5200,5204,5205,5206,5209,5217,5224,5262,5330,5334,5361,5369,5423,5424,5457,5464,5467,5468,5470,5471,5500,5507,5528,5529,5533,5534,5535,5538,5546,5553,5591,5659,5663,5690,5698,5752,5753,5786,5793,5796,5797,5799,5800,5829,5836,5857,5858,5862,5863,5864,5867,5875,5882,5920,5988,5992,6019,6027,6081,6082,6115,6122,6125,6126,6128,6129,6158,6165,6186,6187,6191,6192,6193,6196,6204,6211,6249,6317,6321,6348,6356,6410,6411,6444,6451,6454,6455,6457,6458,6487,6494,6515,6516,6520,6521,6522,6525,6533,6540,6578,6646,6650,6677,6685,6739,6740,6773,6780,6783,6784,6786,6787,6816,6823,6844,6845,6849,6850,6851,6854,6862,6869,6907,6975,6979,7006,7014,7068,7069,7102,7109,7112,7113,7115,7116,7145,7152,7173,7174,7178,7179,7180,7183,7191,7198,7236,7304,7308,7335,7343,7397,7398,7431,7438,7441,7442,7444,7445,7474,7481,7502,7503,7507,7508,7509,7512,7520,7527,7565,7633,7637,7664,7672,7726,7727,7760,7767,7770,7771,7773,7774,7803,7810,7831,7832,7836,7837,7838,7841,7849,7856,7894,7962,7966,7993,8001,8055,8056,8089,8096,8099,8100,8102,8103,8132,8139,8160,8161,8165,8166,8167,8170,8178,8185,8223,8291,8295,8322,8330,8384,8385,8418,8425,8428,8429,8431,8432,8461,8468,8489,8490,8494,8495,8496,8499,8507,8514,8552,8620,8624,8651,8659,8713,8714,8747,8754,8757,8758,8760,8761,8790,8797,8818,8819,8823,8824,8825,8828,8836,8843,8881,8949,8953,8980,8988,9042,9043,9076,9083,9086,9087,9089,9090,9119,9126,9147,9148,9152,9153,9154,9157,9165,9172,9210,9278,9282,9309,9317,9371,9372,9405,9412,9415,9416,9418,9419,9448,9455,9476,9477,9481,9482,9483,9486,9494,9501,9539,9607,9611,9638,9646,9700,9701,9734,9741,9744,9745,9747,9748,9777,9784,9805,9806,9810,9811,9812,9815,9823,9830,9868,9936,9940,9967,9975,10029,10030,10063,10070,10073,10074,10076,10077,10106,10113,10134,10135,10139,10140,10141,10144,10152,10159,10197,10265,10269,10296,10304,10358,10359,10392,10399,10402,10403,10405,10406};
|
2
independent_test_harness/cycles_329_dets/5_cycles.h
Normal file
2
independent_test_harness/cycles_329_dets/5_cycles.h
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
const uint32_t n_cycles = 884;
|
||||||
|
uint32_t cycles[n_cycles] = {95,122,132,134,136,152,154,155,164,166,167,169,190,192,194,202,219,220,234,237,244,251,266,276,290,291,292,310,424,451,461,463,465,481,483,484,493,495,496,498,519,521,523,531,548,549,563,566,573,580,595,605,619,620,621,639,753,780,790,792,794,810,812,813,822,824,825,827,848,850,852,860,877,878,892,895,902,909,924,934,948,949,950,968,1082,1109,1119,1121,1123,1139,1141,1142,1151,1153,1154,1156,1177,1179,1181,1189,1206,1207,1221,1224,1231,1238,1253,1263,1277,1278,1279,1297,1411,1438,1448,1450,1452,1468,1470,1471,1480,1482,1483,1485,1506,1508,1510,1518,1535,1536,1550,1553,1560,1567,1582,1592,1606,1607,1608,1626,1740,1767,1777,1779,1781,1797,1799,1800,1809,1811,1812,1814,1835,1837,1839,1847,1864,1865,1879,1882,1889,1896,1911,1921,1935,1936,1937,1955,2069,2096,2106,2108,2110,2126,2128,2129,2138,2140,2141,2143,2164,2166,2168,2176,2193,2194,2208,2211,2218,2225,2240,2250,2264,2265,2266,2284,2398,2425,2435,2437,2439,2455,2457,2458,2467,2469,2470,2472,2493,2495,2497,2505,2522,2523,2537,2540,2547,2554,2569,2579,2593,2594,2595,2613,2727,2754,2764,2766,2768,2784,2786,2787,2796,2798,2799,2801,2822,2824,2826,2834,2851,2852,2866,2869,2876,2883,2898,2908,2922,2923,2924,2942,3056,3083,3093,3095,3097,3113,3115,3116,3125,3127,3128,3130,3151,3153,3155,3163,3180,3181,3195,3198,3205,3212,3227,3237,3251,3252,3253,3271,3385,3412,3422,3424,3426,3442,3444,3445,3454,3456,3457,3459,3480,3482,3484,3492,3509,3510,3524,3527,3534,3541,3556,3566,3580,3581,3582,3600,3714,3741,3751,3753,3755,3771,3773,3774,3783,3785,3786,3788,3809,3811,3813,3821,3838,3839,3853,3856,3863,3870,3885,3895,3909,3910,3911,3929,4043,4070,4080,4082,4084,4100,4102,4103,4112,4114,4115,4117,4138,4140,4142,4150,4167,4168,4182,4185,4192,4199,4214,4224,4238,4239,4240,4258,4372,4399,4409,4411,4413,4429,4431,4432,4441,4443,4444,4446,4467,4469,4471,4479,4496,4497,4511,4514,4521,4528,4543,4553,4567,4568,4569,4587,4701,4728,4738,4740,4742,4758,4760,4761,4770,4772,4773,4775,4796,4798,4800,4808,4825,4826,4840,4843,4850,4857,4872,4882,4896,4897,4898,4916,5030,5057,5067,5069,5071,5087,5089,5090,5099,5101,5102,5104,5125,5127,5129,5137,5154,5155,5169,5172,5179,5186,5201,5211,5225,5226,5227,5245,5359,5386,5396,5398,5400,5416,5418,5419,5428,5430,5431,5433,5454,5456,5458,5466,5483,5484,5498,5501,5508,5515,5530,5540,5554,5555,5556,5574,5688,5715,5725,5727,5729,5745,5747,5748,5757,5759,5760,5762,5783,5785,5787,5795,5812,5813,5827,5830,5837,5844,5859,5869,5883,5884,5885,5903,6017,6044,6054,6056,6058,6074,6076,6077,6086,6088,6089,6091,6112,6114,6116,6124,6141,6142,6156,6159,6166,6173,6188,6198,6212,6213,6214,6232,6346,6373,6383,6385,6387,6403,6405,6406,6415,6417,6418,6420,6441,6443,6445,6453,6470,6471,6485,6488,6495,6502,6517,6527,6541,6542,6543,6561,6675,6702,6712,6714,6716,6732,6734,6735,6744,6746,6747,6749,6770,6772,6774,6782,6799,6800,6814,6817,6824,6831,6846,6856,6870,6871,6872,6890,7004,7031,7041,7043,7045,7061,7063,7064,7073,7075,7076,7078,7099,7101,7103,7111,7128,7129,7143,7146,7153,7160,7175,7185,7199,7200,7201,7219,7333,7360,7370,7372,7374,7390,7392,7393,7402,7404,7405,7407,7428,7430,7432,7440,7457,7458,7472,7475,7482,7489,7504,7514,7528,7529,7530,7548,7662,7689,7699,7701,7703,7719,7721,7722,7731,7733,7734,7736,7757,7759,7761,7769,7786,7787,7801,7804,7811,7818,7833,7843,7857,7858,7859,7877,7991,8018,8028,8030,8032,8048,8050,8051,8060,8062,8063,8065,8086,8088,8090,8098,8115,8116,8130,8133,8140,8147,8162,8172,8186,8187,8188,8206,8320,8347,8357,8359,8361,8377,8379,8380,8389,8391,8392,8394,8415,8417,8419,8427,8444,8445,8459,8462,8469,8476,8491,8501,8515,8516,8517,8535,8649,8676,8686,8688,8690,8706,8708,8709,8718,8720,8721,8723,8744,8746,8748,8756,8773,8774,8788,8791,8798,8805,8820,8830,8844,8845,8846,8864,8978,9005,9015,9017,9019,9035,9037,9038,9047,9049,9050,9052,9073,9075,9077,9085,9102,9103,9117,9120,9127,9134,9149,9159,9173,9174,9175,9193,9307,9334,9344,9346,9348,9364,9366,9367,9376,9378,9379,9381,9402,9404,9406,9414,9431,9432,9446,9449,9456,9463,9478,9488,9502,9503,9504,9522,9636,9663,9673,9675,9677,9693,9695,9696,9705,9707,9708,9710,9731,9733,9735,9743,9760,9761,9775,9778,9785,9792,9807,9817,9831,9832,9833,9851,9965,9992,10002,10004,10006,10022,10024,10025,10034,10036,10037,10039,10060,10062,10064,10072,10089,10090,10104,10107,10114,10121,10136,10146,10160,10161,10162,10180,10294,10321,10331,10333,10335,10351,10353,10354,10363,10365,10366,10368,10389,10391,10393,10401};
|
2
independent_test_harness/cycles_329_dets/9_cycles.h
Normal file
2
independent_test_harness/cycles_329_dets/9_cycles.h
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
const uint32_t n_cycles = 346;
|
||||||
|
uint32_t cycles[n_cycles] = {62,102,131,137,216,217,233,235,322,323,324,391,431,460,466,545,546,562,564,651,652,653,720,760,789,795,874,875,891,893,980,981,982,1049,1089,1118,1124,1203,1204,1220,1222,1309,1310,1311,1378,1418,1447,1453,1532,1533,1549,1551,1638,1639,1640,1707,1747,1776,1782,1861,1862,1878,1880,1967,1968,1969,2036,2076,2105,2111,2190,2191,2207,2209,2296,2297,2298,2365,2405,2434,2440,2519,2520,2536,2538,2625,2626,2627,2694,2734,2763,2769,2848,2849,2865,2867,2954,2955,2956,3023,3063,3092,3098,3177,3178,3194,3196,3283,3284,3285,3352,3392,3421,3427,3506,3507,3523,3525,3612,3613,3614,3681,3721,3750,3756,3835,3836,3852,3854,3941,3942,3943,4010,4050,4079,4085,4164,4165,4181,4183,4270,4271,4272,4339,4379,4408,4414,4493,4494,4510,4512,4599,4600,4601,4668,4708,4737,4743,4822,4823,4839,4841,4928,4929,4930,4997,5037,5066,5072,5151,5152,5168,5170,5257,5258,5259,5326,5366,5395,5401,5480,5481,5497,5499,5586,5587,5588,5655,5695,5724,5730,5809,5810,5826,5828,5915,5916,5917,5984,6024,6053,6059,6138,6139,6155,6157,6244,6245,6246,6313,6353,6382,6388,6467,6468,6484,6486,6573,6574,6575,6642,6682,6711,6717,6796,6797,6813,6815,6902,6903,6904,6971,7011,7040,7046,7125,7126,7142,7144,7231,7232,7233,7300,7340,7369,7375,7454,7455,7471,7473,7560,7561,7562,7629,7669,7698,7704,7783,7784,7800,7802,7889,7890,7891,7958,7998,8027,8033,8112,8113,8129,8131,8218,8219,8220,8287,8327,8356,8362,8441,8442,8458,8460,8547,8548,8549,8616,8656,8685,8691,8770,8771,8787,8789,8876,8877,8878,8945,8985,9014,9020,9099,9100,9116,9118,9205,9206,9207,9274,9314,9343,9349,9428,9429,9445,9447,9534,9535,9536,9603,9643,9672,9678,9757,9758,9774,9776,9863,9864,9865,9932,9972,10001,10007,10086,10087,10103,10105,10192,10193,10194,10261,10301,10330,10336,10415};
|
2
independent_test_harness/cycles_329_dets/all_cycles.h
Normal file
2
independent_test_harness/cycles_329_dets/all_cycles.h
Normal file
File diff suppressed because one or more lines are too long
0
independent_test_harness/cycles_329_dets/tmp
Normal file
0
independent_test_harness/cycles_329_dets/tmp
Normal file
@ -0,0 +1 @@
|
|||||||
|
Empty placeholder. Please download real dataset from: trex@trex-share.univ-tlse3.fr:uvsq/datasets/dataset_15784d_zeropadded_cm.hdf5
|
1
independent_test_harness/dataset_329d_zeropadded_cm.hdf5
Normal file
1
independent_test_harness/dataset_329d_zeropadded_cm.hdf5
Normal file
@ -0,0 +1 @@
|
|||||||
|
Empty placeholder. Please download real dataset from: trex@trex-share.univ-tlse3.fr:uvsq/datasets/dataset_329d_zeropadded_cm.hdf5
|
108
independent_test_harness/detupdate21.f90
Normal file
108
independent_test_harness/detupdate21.f90
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
subroutine detupd(vDim, vLDS, Updates, Updates_index, &
|
||||||
|
Slater_inv, determinant) bind(C)
|
||||||
|
use iso_c_binding
|
||||||
|
implicit none ! det.irp.f_template_577: 428
|
||||||
|
|
||||||
|
external :: det_update21
|
||||||
|
integer(c_int64_t), intent(in), value :: vLDS, vDim
|
||||||
|
real(c_double), intent(in) :: Updates(vLDS)
|
||||||
|
integer(c_int64_t), intent(in) :: Updates_index(1)
|
||||||
|
real(c_double), intent(inout) :: Slater_inv(vLDS,vDim)
|
||||||
|
real(c_double), intent(inout) :: determinant
|
||||||
|
|
||||||
|
integer(c_int64_t) :: l, n, LDS ! det.irp.f_template_577: 432
|
||||||
|
|
||||||
|
n = vDim
|
||||||
|
LDS = vLDS
|
||||||
|
l = Updates_index(1)
|
||||||
|
call det_update21(n, LDS, Updates, l, Slater_inv, determinant) ! det.irp.f_template_577: 427
|
||||||
|
end
|
||||||
|
|
||||||
|
subroutine det_update21(n, LDS, u, l, S_inv, d) ! det.irp.f_template_577: 427
|
||||||
|
use iso_c_binding
|
||||||
|
implicit none ! det.irp.f_template_577: 428
|
||||||
|
integer(c_int64_t), intent(in) :: l ! det.irp.f_template_577: 430
|
||||||
|
integer(c_int64_t), intent(in) :: n,LDS ! det.irp.f_template_577: 430
|
||||||
|
real(c_double),intent(inout) :: S_inv(LDS,n) ! det.irp.f_template_577: 435
|
||||||
|
real(c_double),intent(inout) :: d ! det.irp.f_template_577: 436
|
||||||
|
real(c_double), intent(in) :: u(n)
|
||||||
|
real(c_double) :: z(n), w(n), lambda, d_inv ! det.irp.f_template_577: 438
|
||||||
|
integer(c_int64_t) :: i,j ! det.irp.f_template_577: 444
|
||||||
|
real(c_double) :: zj, zj1, zj2, zj3 ! det.irp.f_template_577: 445
|
||||||
|
|
||||||
|
!DIR$ ATTRIBUTES ALIGN : 32 :: z, w ! det.irp.f_template_577: 439
|
||||||
|
!DIR$ ASSUME_ALIGNED u : 32 ! det.irp.f_template_577: 440
|
||||||
|
!DIR$ ASSUME_ALIGNED S_inv : 32 ! det.irp.f_template_577: 441
|
||||||
|
!DIR$ ASSUME (mod(LDS,32/8) == 0) ! det.irp.f_template_577: 442
|
||||||
|
!DIR$ ASSUME (LDS >= 21) ! det.irp.f_template_577: 443
|
||||||
|
|
||||||
|
zj = 0.d0 !! dot prod col S_inv and update: vT*S_inv*u ! det.irp.f_template_577: 451
|
||||||
|
!DIR$ NOPREFETCH ! det.irp.f_template_577: 452
|
||||||
|
do i=1,21-1,4 ! det.irp.f_template_577: 453
|
||||||
|
zj = zj + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) &
|
||||||
|
+ S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3) ! det.irp.f_template_577: 454
|
||||||
|
enddo ! det.irp.f_template_577: 456
|
||||||
|
zj = zj + S_inv(21,l)*u(21) ! det.irp.f_template_577: 457
|
||||||
|
|
||||||
|
d_inv = 1.d0/d ! reciprocal of old det ! det.irp.f_template_577: 459
|
||||||
|
d = d+zj ! det.irp.f_template_577: 460
|
||||||
|
lambda = d*d_inv ! det.irp.f_template_577: 461
|
||||||
|
if (dabs(lambda) < 1.d-3) then ! det.irp.f_template_577: 462
|
||||||
|
! d = 0.d0 ! det.irp.f_template_577: 463
|
||||||
|
return ! det.irp.f_template_577: 464
|
||||||
|
endif ! det.irp.f_template_577: 465
|
||||||
|
!DIR$ VECTOR ALIGNED ! det.irp.f_template_577: 467
|
||||||
|
do j=1,21-1,4 ! det.irp.f_template_577: 468
|
||||||
|
zj = 0.d0 ! det.irp.f_template_577: 469
|
||||||
|
zj1 = 0.d0 ! det.irp.f_template_577: 470
|
||||||
|
zj2 = 0.d0 ! det.irp.f_template_577: 471
|
||||||
|
zj3 = 0.d0 ! det.irp.f_template_577: 472
|
||||||
|
!DIR$ VECTOR ALIGNED ! det.irp.f_template_577: 473
|
||||||
|
!DIR$ NOPREFETCH ! det.irp.f_template_577: 474
|
||||||
|
do i=1,21-1 ! det.irp.f_template_577: 475
|
||||||
|
zj = zj + S_inv(i,j )*u(i) ! det.irp.f_template_577: 476
|
||||||
|
zj1 = zj1 + S_inv(i,j+1)*u(i) ! det.irp.f_template_577: 477
|
||||||
|
zj2 = zj2 + S_inv(i,j+2)*u(i) ! det.irp.f_template_577: 478
|
||||||
|
zj3 = zj3 + S_inv(i,j+3)*u(i) ! det.irp.f_template_577: 479
|
||||||
|
enddo ! det.irp.f_template_577: 480
|
||||||
|
z(j ) = zj + S_inv(21,j )*u(21) ! det.irp.f_template_577: 481
|
||||||
|
z(j+1) = zj1 + S_inv(21,j+1)*u(21) ! det.irp.f_template_577: 482
|
||||||
|
z(j+2) = zj2 + S_inv(21,j+2)*u(21) ! det.irp.f_template_577: 483
|
||||||
|
z(j+3) = zj3 + S_inv(21,j+3)*u(21) ! det.irp.f_template_577: 484
|
||||||
|
enddo ! det.irp.f_template_577: 485
|
||||||
|
zj = 0.d0 ! det.irp.f_template_577: 487
|
||||||
|
!DIR$ VECTOR ALIGNED ! det.irp.f_template_577: 488
|
||||||
|
!DIR$ NOPREFETCH ! det.irp.f_template_577: 489
|
||||||
|
do i=1,21-1 ! det.irp.f_template_577: 490
|
||||||
|
zj = zj + S_inv(i,21)*u(i) ! det.irp.f_template_577: 491
|
||||||
|
enddo ! det.irp.f_template_577: 492
|
||||||
|
z(21) = zj + S_inv(21,21)*u(21) ! det.irp.f_template_577: 493
|
||||||
|
!DIR$ NOPREFETCH ! det.irp.f_template_577: 495
|
||||||
|
do i=1,21 ! det.irp.f_template_577: 496
|
||||||
|
w(i) = S_inv(i,l)*d_inv ! det.irp.f_template_577: 497
|
||||||
|
enddo ! det.irp.f_template_577: 499
|
||||||
|
do i=1,21-1,4 ! det.irp.f_template_577: 501
|
||||||
|
zj = z(i ) ! det.irp.f_template_577: 502
|
||||||
|
zj1 = z(i+1) ! det.irp.f_template_577: 503
|
||||||
|
zj2 = z(i+2) ! det.irp.f_template_577: 504
|
||||||
|
zj3 = z(i+3) ! det.irp.f_template_577: 505
|
||||||
|
!DIR$ VECTOR ALIGNED ! det.irp.f_template_577: 506
|
||||||
|
!DIR$ NOPREFETCH ! det.irp.f_template_577: 507
|
||||||
|
do j=1,21-1 ! det.irp.f_template_577: 508
|
||||||
|
S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*zj ! det.irp.f_template_577: 509
|
||||||
|
S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*zj1 ! det.irp.f_template_577: 510
|
||||||
|
S_inv(j,i+2) = S_inv(j,i+2)*lambda - w(j)*zj2 ! det.irp.f_template_577: 511
|
||||||
|
S_inv(j,i+3) = S_inv(j,i+3)*lambda - w(j)*zj3 ! det.irp.f_template_577: 512
|
||||||
|
enddo ! det.irp.f_template_577: 513
|
||||||
|
S_inv(21,i ) = S_inv(21,i )*lambda - w(21)*zj ! det.irp.f_template_577: 514
|
||||||
|
S_inv(21,i+1) = S_inv(21,i+1)*lambda - w(21)*zj1 ! det.irp.f_template_577: 515
|
||||||
|
S_inv(21,i+2) = S_inv(21,i+2)*lambda - w(21)*zj2 ! det.irp.f_template_577: 516
|
||||||
|
S_inv(21,i+3) = S_inv(21,i+3)*lambda - w(21)*zj3 ! det.irp.f_template_577: 517
|
||||||
|
enddo ! det.irp.f_template_577: 518
|
||||||
|
zj = z(21) ! det.irp.f_template_577: 520
|
||||||
|
!DIR$ VECTOR ALIGNED ! det.irp.f_template_577: 521
|
||||||
|
!DIR$ NOPREFETCH ! det.irp.f_template_577: 522
|
||||||
|
do i=1,21 ! det.irp.f_template_577: 523
|
||||||
|
S_inv(i,21) = S_inv(i,21)*lambda -w(i)*zj ! det.irp.f_template_577: 524
|
||||||
|
enddo ! det.irp.f_template_577: 525
|
||||||
|
end ! det.irp.f_template_577: 528
|
28
independent_test_harness/gen_list_update_cycles.sh
Executable file
28
independent_test_harness/gen_list_update_cycles.sh
Executable file
@ -0,0 +1,28 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
IN=$1 # Input dataset (hdf5)
|
||||||
|
NU=$2 # Number of updates
|
||||||
|
|
||||||
|
CYCLE_LIST=$(h5ls ${IN} | awk '{print$1}' | sed 's/cycle_//g' | sort -n)
|
||||||
|
SELECTION=()
|
||||||
|
|
||||||
|
# Filter CYCLE_LIST and add to SELECTION
|
||||||
|
for CYCLE in ${CYCLE_LIST}
|
||||||
|
do
|
||||||
|
NUPDS=$(h5ls -d ${IN}/cycle_${CYCLE}/nupdates | awk 'FNR == 3 {print $2}')
|
||||||
|
if (( NUPDS == NU ))
|
||||||
|
then
|
||||||
|
SELECTION+=($CYCLE)
|
||||||
|
fi
|
||||||
|
# SELECTION+=($CYCLE)
|
||||||
|
done
|
||||||
|
|
||||||
|
# Generate C-header file
|
||||||
|
NELEMENTS=${#SELECTION[@]}
|
||||||
|
echo "const uint32_t n_cycles = $NELEMENTS;" > ${NU}_cycles.h
|
||||||
|
echo -n "uint32_t cycles[n_cycles] = {" >> ${NU}_cycles.h
|
||||||
|
for VAL in "${SELECTION[@]}"
|
||||||
|
do
|
||||||
|
echo -n "$VAL," >> ${NU}_cycles.h
|
||||||
|
done
|
||||||
|
truncate -s-1 ${NU}_cycles.h # remove last ','
|
||||||
|
echo "};" >> ${NU}_cycles.h
|
806
independent_test_harness/get_stats.m
Executable file
806
independent_test_harness/get_stats.m
Executable file
@ -0,0 +1,806 @@
|
|||||||
|
#! /bin/octave -qf
|
||||||
|
|
||||||
|
data_anthony=load('ANTHONY.dat');
|
||||||
|
data_naive=load('NAIVE.dat');
|
||||||
|
data_later=load('LATER.dat');
|
||||||
|
data_split=load('SPLITTING.dat');
|
||||||
|
data_blocked=load('BLOCKED.dat');
|
||||||
|
data_lapack=load('MKL_LAPACK.dat');
|
||||||
|
data_wb2=load('WB2.dat');
|
||||||
|
data_wb3=load('WB3.dat');
|
||||||
|
|
||||||
|
indcs=(data_anthony(:,5)==0); % select cycles that passed
|
||||||
|
anthony_pass_all=data_anthony(indcs,:);
|
||||||
|
dlmwrite('anthony_pass_all.dat',anthony_pass_all, ' ')
|
||||||
|
|
||||||
|
indcs=(data_anthony(:,5)!=0); % select cycles that failed
|
||||||
|
anthony_fail_all=data_anthony(indcs,:);
|
||||||
|
dlmwrite('anthony_fail_all.dat',anthony_fail_all, ' ')
|
||||||
|
|
||||||
|
indcs=(anthony_pass_all(:,2)==1); % select cycles that passed containing 1 upd
|
||||||
|
anthony_pass_1=anthony_pass_all(indcs,:);
|
||||||
|
dlmwrite('anthony_pass_1.dat',anthony_pass_1, ' ')
|
||||||
|
indcs=(anthony_fail_all(:,2)==1); % select cycles that failed containing 1 upd
|
||||||
|
anthony_fail_1=anthony_fail_all(indcs,:);
|
||||||
|
dlmwrite('anthony_fail_1.dat',anthony_fail_1, ' ')
|
||||||
|
|
||||||
|
indcs=(anthony_pass_all(:,2)==2); % select cycles that passed containing 2 upd
|
||||||
|
anthony_pass_2=anthony_pass_all(indcs,:);
|
||||||
|
dlmwrite('anthony_pass_2.dat',anthony_pass_2, ' ')
|
||||||
|
indcs=(anthony_fail_all(:,2)==2); % select cycles that failed containing 2 upd
|
||||||
|
anthony_fail_2=anthony_fail_all(indcs,:);
|
||||||
|
dlmwrite('anthony_fail_2.dat',anthony_fail_2, ' ')
|
||||||
|
|
||||||
|
indcs=(anthony_pass_all(:,2)==3); % select cycles that passed containing 3 upd
|
||||||
|
anthony_pass_3=anthony_pass_all(indcs,:);
|
||||||
|
dlmwrite('anthony_pass_3.dat',anthony_pass_3, ' ')
|
||||||
|
indcs=(anthony_fail_all(:,2)==3); % select cycles that failed containing 3 upd
|
||||||
|
anthony_fail_3=anthony_fail_all(indcs,:);
|
||||||
|
dlmwrite('anthony_fail_3.dat',anthony_fail_3, ' ')
|
||||||
|
|
||||||
|
indcs=(anthony_pass_all(:,2)==4); % select cycles that passed containing 4 upd
|
||||||
|
anthony_pass_4=anthony_pass_all(indcs,:);
|
||||||
|
dlmwrite('anthony_pass_4.dat',anthony_pass_4, ' ')
|
||||||
|
indcs=(anthony_fail_all(:,2)==4); % select cycles that failed containing 4 upd
|
||||||
|
anthony_fail_4=anthony_fail_all(indcs,:);
|
||||||
|
dlmwrite('anthony_fail_4.dat',anthony_fail_4, ' ')
|
||||||
|
|
||||||
|
indcs=(anthony_pass_all(:,2)==5); % select cycles that passed containing 5 upd
|
||||||
|
anthony_pass_5=anthony_pass_all(indcs,:);
|
||||||
|
dlmwrite('anthony_pass_5.dat',anthony_pass_5, ' ')
|
||||||
|
indcs=(anthony_fail_all(:,2)==5); % select cycles that failed containing 5 upd
|
||||||
|
anthony_fail_5=anthony_fail_all(indcs,:);
|
||||||
|
dlmwrite('anthony_fail_5.dat',anthony_fail_5, ' ')
|
||||||
|
|
||||||
|
indcs=(anthony_pass_all(:,2)==6); % select cycles that passed containing 6 upd
|
||||||
|
anthony_pass_6=anthony_pass_all(indcs,:);
|
||||||
|
dlmwrite('anthony_pass_6.dat',anthony_pass_6, ' ')
|
||||||
|
indcs=(anthony_fail_all(:,2)==6); % select cycles that failed containing 6 upd
|
||||||
|
anthony_fail_6=anthony_fail_all(indcs,:);
|
||||||
|
dlmwrite('anthony_fail_6.dat',anthony_fail_6, ' ')
|
||||||
|
|
||||||
|
indcs=(anthony_pass_all(:,2)==7); % select cycles that passed containing 7 upd
|
||||||
|
anthony_pass_7=anthony_pass_all(indcs,:);
|
||||||
|
dlmwrite('anthony_pass_7.dat',anthony_pass_7, ' ')
|
||||||
|
indcs=(anthony_fail_all(:,2)==7); % select cycles that failed containing 7 upd
|
||||||
|
anthony_fail_7=anthony_fail_all(indcs,:);
|
||||||
|
dlmwrite('anthony_fail_7.dat',anthony_fail_7, ' ')
|
||||||
|
|
||||||
|
indcs=(anthony_pass_all(:,2)==8); % select cycles that passed containing 8 upd
|
||||||
|
anthony_pass_8=anthony_pass_all(indcs,:);
|
||||||
|
dlmwrite('anthony_pass_8.dat',anthony_pass_8, ' ')
|
||||||
|
indcs=(anthony_fail_all(:,2)==8); % select cycles that failed containing 8 upd
|
||||||
|
anthony_fail_8=anthony_fail_all(indcs,:);
|
||||||
|
dlmwrite('anthony_fail_8.dat',anthony_fail_8, ' ')
|
||||||
|
|
||||||
|
indcs=(anthony_pass_all(:,2)==9); % select cycles that passed containing 9 upd
|
||||||
|
anthony_pass_9=anthony_pass_all(indcs,:);
|
||||||
|
dlmwrite('anthony_pass_9.dat',anthony_pass_9, ' ')
|
||||||
|
indcs=(anthony_fail_all(:,2)==9); % select cycles that failed containing 9 upd
|
||||||
|
anthony_fail_9=anthony_fail_all(indcs,:);
|
||||||
|
dlmwrite('anthony_fail_9.dat',anthony_fail_9, ' ')
|
||||||
|
|
||||||
|
indcs=(anthony_pass_all(:,2)==10); % select cycles that passed containing 10 upd
|
||||||
|
anthony_pass_10=anthony_pass_all(indcs,:);
|
||||||
|
dlmwrite('anthony_pass_10.dat',anthony_pass_10, ' ')
|
||||||
|
indcs=(anthony_fail_all(:,2)==10); % select cycles that failed containing 10 upd
|
||||||
|
anthony_fail_10=anthony_fail_all(indcs,:);
|
||||||
|
dlmwrite('anthony_fail_10.dat',anthony_fail_10, ' ')
|
||||||
|
|
||||||
|
indcs=(anthony_pass_all(:,2)==11); % select cycles that passed containing 11 upd
|
||||||
|
anthony_pass_11=anthony_pass_all(indcs,:);
|
||||||
|
dlmwrite('anthony_pass_11.dat',anthony_pass_11, ' ')
|
||||||
|
indcs=(anthony_fail_all(:,2)==11); % select cycles that failed containing 11 upd
|
||||||
|
anthony_fail_11=anthony_fail_all(indcs,:);
|
||||||
|
dlmwrite('anthony_fail_11.dat',anthony_fail_11, ' ')
|
||||||
|
|
||||||
|
indcs=(anthony_pass_all(:,2)==12); % select cycles that passed containing 12 upd
|
||||||
|
anthony_pass_12=anthony_pass_all(indcs,:);
|
||||||
|
dlmwrite('anthony_pass_12.dat',anthony_pass_12, ' ')
|
||||||
|
indcs=(anthony_fail_all(:,2)==12); % select cycles that failed containing 12 upd
|
||||||
|
anthony_fail_12=anthony_fail_all(indcs,:);
|
||||||
|
dlmwrite('anthony_fail_12.dat',anthony_fail_12, ' ')
|
||||||
|
|
||||||
|
indcs=(anthony_pass_all(:,2)==13); % select cycles that passed containing 13 upd
|
||||||
|
anthony_pass_13=anthony_pass_all(indcs,:);
|
||||||
|
dlmwrite('anthony_pass_13.dat',anthony_pass_13, ' ')
|
||||||
|
indcs=(anthony_fail_all(:,2)==13); % select cycles that failed containing 13 upd
|
||||||
|
anthony_fail_13=anthony_fail_all(indcs,:);
|
||||||
|
dlmwrite('anthony_fail_13.dat',anthony_fail_13, ' ')
|
||||||
|
|
||||||
|
indcs=(anthony_pass_all(:,2)==14); % select cycles that passed containing 14 upd
|
||||||
|
anthony_pass_14=anthony_pass_all(indcs,:);
|
||||||
|
dlmwrite('anthony_pass_14.dat',anthony_pass_14, ' ')
|
||||||
|
indcs=(anthony_fail_all(:,2)==14); % select cycles that failed containing 14 upd
|
||||||
|
anthony_fail_14=anthony_fail_all(indcs,:);
|
||||||
|
dlmwrite('anthony_fail_14.dat',anthony_fail_14, ' ')
|
||||||
|
|
||||||
|
indcs=(anthony_pass_all(:,2)==15); % select cycles that passed containing 15 upd
|
||||||
|
anthony_pass_15=anthony_pass_all(indcs,:);
|
||||||
|
dlmwrite('anthony_pass_15.dat',anthony_pass_15, ' ')
|
||||||
|
indcs=(anthony_fail_all(:,2)==15); % select cycles that failed containing 15 upd
|
||||||
|
anthony_fail_15=anthony_fail_all(indcs,:);
|
||||||
|
dlmwrite('anthony_fail_15.dat',anthony_fail_15, ' ')
|
||||||
|
|
||||||
|
|
||||||
|
indcs=(data_naive(:,5)==0); % select cycles that passed
|
||||||
|
naive_pass_all=data_naive(indcs,:);
|
||||||
|
dlmwrite('naive_pass_all.dat',naive_pass_all, ' ')
|
||||||
|
|
||||||
|
indcs=(data_naive(:,5)!=0); % select cycles that failed
|
||||||
|
naive_fail_all=data_naive(indcs,:);
|
||||||
|
dlmwrite('naive_fail_all.dat',naive_fail_all, ' ')
|
||||||
|
|
||||||
|
indcs=(naive_pass_all(:,2)==1); % select cycles that passed containing 1 upd
|
||||||
|
naive_pass_1=naive_pass_all(indcs,:);
|
||||||
|
dlmwrite('naive_pass_1.dat',naive_pass_1, ' ')
|
||||||
|
indcs=(naive_fail_all(:,2)==1); % select cycles that failed containing 1 upd
|
||||||
|
naive_fail_1=naive_fail_all(indcs,:);
|
||||||
|
dlmwrite('naive_fail_1.dat',naive_fail_1, ' ')
|
||||||
|
|
||||||
|
indcs=(naive_pass_all(:,2)==2); % select cycles that passed containing 2 upd
|
||||||
|
naive_pass_2=naive_pass_all(indcs,:);
|
||||||
|
dlmwrite('naive_pass_2.dat',naive_pass_2, ' ')
|
||||||
|
indcs=(naive_fail_all(:,2)==2); % select cycles that failed containing 2 upd
|
||||||
|
naive_fail_2=naive_fail_all(indcs,:);
|
||||||
|
dlmwrite('naive_fail_2.dat',naive_fail_2, ' ')
|
||||||
|
|
||||||
|
indcs=(naive_pass_all(:,2)==3); % select cycles that passed containing 3 upd
|
||||||
|
naive_pass_3=naive_pass_all(indcs,:);
|
||||||
|
dlmwrite('naive_pass_3.dat',naive_pass_3, ' ')
|
||||||
|
indcs=(naive_fail_all(:,2)==3); % select cycles that failed containing 3 upd
|
||||||
|
naive_fail_3=naive_fail_all(indcs,:);
|
||||||
|
dlmwrite('naive_fail_3.dat',naive_fail_3, ' ')
|
||||||
|
|
||||||
|
indcs=(naive_pass_all(:,2)==4); % select cycles that passed containing 4 upd
|
||||||
|
naive_pass_4=naive_pass_all(indcs,:);
|
||||||
|
dlmwrite('naive_pass_4.dat',naive_pass_4, ' ')
|
||||||
|
indcs=(naive_fail_all(:,2)==4); % select cycles that failed containing 4 upd
|
||||||
|
naive_fail_4=naive_fail_all(indcs,:);
|
||||||
|
dlmwrite('naive_fail_4.dat',naive_fail_4, ' ')
|
||||||
|
|
||||||
|
indcs=(naive_pass_all(:,2)==5); % select cycles that passed containing 5 upd
|
||||||
|
naive_pass_5=naive_pass_all(indcs,:);
|
||||||
|
dlmwrite('naive_pass_5.dat',naive_pass_5, ' ')
|
||||||
|
indcs=(naive_fail_all(:,2)==5); % select cycles that failed containing 5 upd
|
||||||
|
naive_fail_5=naive_fail_all(indcs,:);
|
||||||
|
dlmwrite('naive_fail_5.dat',naive_fail_5, ' ')
|
||||||
|
|
||||||
|
indcs=(naive_pass_all(:,2)==6); % select cycles that passed containing 6 upd
|
||||||
|
naive_pass_6=naive_pass_all(indcs,:);
|
||||||
|
dlmwrite('naive_pass_6.dat',naive_pass_6, ' ')
|
||||||
|
indcs=(naive_fail_all(:,2)==6); % select cycles that failed containing 6 upd
|
||||||
|
naive_fail_6=naive_fail_all(indcs,:);
|
||||||
|
dlmwrite('naive_fail_6.dat',naive_fail_6, ' ')
|
||||||
|
|
||||||
|
indcs=(naive_pass_all(:,2)==7); % select cycles that passed containing 7 upd
|
||||||
|
naive_pass_7=naive_pass_all(indcs,:);
|
||||||
|
dlmwrite('naive_pass_7.dat',naive_pass_7, ' ')
|
||||||
|
indcs=(naive_fail_all(:,2)==7); % select cycles that failed containing 7 upd
|
||||||
|
naive_fail_7=naive_fail_all(indcs,:);
|
||||||
|
dlmwrite('naive_fail_7.dat',naive_fail_7, ' ')
|
||||||
|
|
||||||
|
indcs=(naive_pass_all(:,2)==8); % select cycles that passed containing 8 upd
|
||||||
|
naive_pass_8=naive_pass_all(indcs,:);
|
||||||
|
dlmwrite('naive_pass_8.dat',naive_pass_8, ' ')
|
||||||
|
indcs=(naive_fail_all(:,2)==8); % select cycles that failed containing 8 upd
|
||||||
|
naive_fail_8=naive_fail_all(indcs,:);
|
||||||
|
dlmwrite('naive_fail_8.dat',naive_fail_8, ' ')
|
||||||
|
|
||||||
|
indcs=(naive_pass_all(:,2)==9); % select cycles that passed containing 9 upd
|
||||||
|
naive_pass_9=naive_pass_all(indcs,:);
|
||||||
|
dlmwrite('naive_pass_9.dat',naive_pass_9, ' ')
|
||||||
|
indcs=(naive_fail_all(:,2)==9); % select cycles that failed containing 9 upd
|
||||||
|
naive_fail_9=naive_fail_all(indcs,:);
|
||||||
|
dlmwrite('naive_fail_9.dat',naive_fail_9, ' ')
|
||||||
|
|
||||||
|
indcs=(naive_pass_all(:,2)==10); % select cycles that passed containing 10 upd
|
||||||
|
naive_pass_10=naive_pass_all(indcs,:);
|
||||||
|
dlmwrite('naive_pass_10.dat',naive_pass_10, ' ')
|
||||||
|
indcs=(naive_fail_all(:,2)==10); % select cycles that failed containing 10 upd
|
||||||
|
naive_fail_10=naive_fail_all(indcs,:);
|
||||||
|
dlmwrite('naive_fail_10.dat',naive_fail_10, ' ')
|
||||||
|
|
||||||
|
indcs=(naive_pass_all(:,2)==11); % select cycles that passed containing 11 upd
|
||||||
|
naive_pass_11=naive_pass_all(indcs,:);
|
||||||
|
dlmwrite('naive_pass_11.dat',naive_pass_11, ' ')
|
||||||
|
indcs=(naive_fail_all(:,2)==11); % select cycles that failed containing 11 upd
|
||||||
|
naive_fail_11=naive_fail_all(indcs,:);
|
||||||
|
dlmwrite('naive_fail_11.dat',naive_fail_11, ' ')
|
||||||
|
|
||||||
|
indcs=(naive_pass_all(:,2)==12); % select cycles that passed containing 12 upd
|
||||||
|
naive_pass_12=naive_pass_all(indcs,:);
|
||||||
|
dlmwrite('naive_pass_12.dat',naive_pass_12, ' ')
|
||||||
|
indcs=(naive_fail_all(:,2)==12); % select cycles that failed containing 12 upd
|
||||||
|
naive_fail_12=naive_fail_all(indcs,:);
|
||||||
|
dlmwrite('naive_fail_12.dat',naive_fail_12, ' ')
|
||||||
|
|
||||||
|
indcs=(naive_pass_all(:,2)==13); % select cycles that passed containing 13 upd
|
||||||
|
naive_pass_13=naive_pass_all(indcs,:);
|
||||||
|
dlmwrite('naive_pass_13.dat',naive_pass_13, ' ')
|
||||||
|
indcs=(naive_fail_all(:,2)==13); % select cycles that failed containing 13 upd
|
||||||
|
naive_fail_13=naive_fail_all(indcs,:);
|
||||||
|
dlmwrite('naive_fail_13.dat',naive_fail_13, ' ')
|
||||||
|
|
||||||
|
indcs=(naive_pass_all(:,2)==14); % select cycles that passed containing 14 upd
|
||||||
|
naive_pass_14=naive_pass_all(indcs,:);
|
||||||
|
dlmwrite('naive_pass_14.dat',naive_pass_14, ' ')
|
||||||
|
indcs=(naive_fail_all(:,2)==14); % select cycles that failed containing 14 upd
|
||||||
|
naive_fail_14=naive_fail_all(indcs,:);
|
||||||
|
dlmwrite('naive_fail_14.dat',naive_fail_14, ' ')
|
||||||
|
|
||||||
|
indcs=(naive_pass_all(:,2)==15); % select cycles that passed containing 15 upd
|
||||||
|
naive_pass_15=naive_pass_all(indcs,:);
|
||||||
|
dlmwrite('naive_pass_15.dat',naive_pass_15, ' ')
|
||||||
|
indcs=(naive_fail_all(:,2)==15); % select cycles that failed containing 15 upd
|
||||||
|
naive_fail_15=naive_fail_all(indcs,:);
|
||||||
|
dlmwrite('naive_fail_15.dat',naive_fail_15, ' ')
|
||||||
|
|
||||||
|
indcs=(data_later(:,5)==0); % select cycles that passed
|
||||||
|
later_pass_all=data_later(indcs,:);
|
||||||
|
dlmwrite('later_pass_all.dat',later_pass_all, ' ')
|
||||||
|
|
||||||
|
indcs=(data_later(:,5)!=0); % select cycles that failed
|
||||||
|
later_fail_all=data_later(indcs,:);
|
||||||
|
dlmwrite('later_fail_all.dat',later_fail_all, ' ')
|
||||||
|
|
||||||
|
indcs=(later_pass_all(:,2)==1); % select cycles that passed containing 1 upd
|
||||||
|
later_pass_1=later_pass_all(indcs,:);
|
||||||
|
dlmwrite('later_pass_1.dat',later_pass_1, ' ')
|
||||||
|
indcs=(later_fail_all(:,2)==1); % select cycles that failed containing 1 upd
|
||||||
|
later_fail_1=later_fail_all(indcs,:);
|
||||||
|
dlmwrite('later_fail_1.dat',later_fail_1, ' ')
|
||||||
|
|
||||||
|
indcs=(later_pass_all(:,2)==2); % select cycles that passed containing 2 upd
|
||||||
|
later_pass_2=later_pass_all(indcs,:);
|
||||||
|
dlmwrite('later_pass_2.dat',later_pass_2, ' ')
|
||||||
|
indcs=(later_fail_all(:,2)==2); % select cycles that failed containing 2 upd
|
||||||
|
later_fail_2=later_fail_all(indcs,:);
|
||||||
|
dlmwrite('later_fail_2.dat',later_fail_2, ' ')
|
||||||
|
|
||||||
|
indcs=(later_pass_all(:,2)==3); % select cycles that passed containing 3 upd
|
||||||
|
later_pass_3=later_pass_all(indcs,:);
|
||||||
|
dlmwrite('later_pass_3.dat',later_pass_3, ' ')
|
||||||
|
indcs=(later_fail_all(:,2)==3); % select cycles that failed containing 3 upd
|
||||||
|
later_fail_3=later_fail_all(indcs,:);
|
||||||
|
dlmwrite('later_fail_3.dat',later_fail_3, ' ')
|
||||||
|
|
||||||
|
indcs=(later_pass_all(:,2)==4); % select cycles that passed containing 4 upd
|
||||||
|
later_pass_4=later_pass_all(indcs,:);
|
||||||
|
dlmwrite('later_pass_4.dat',later_pass_4, ' ')
|
||||||
|
indcs=(later_fail_all(:,2)==4); % select cycles that failed containing 4 upd
|
||||||
|
later_fail_4=later_fail_all(indcs,:);
|
||||||
|
dlmwrite('later_fail_4.dat',later_fail_4, ' ')
|
||||||
|
|
||||||
|
indcs=(later_pass_all(:,2)==5); % select cycles that passed containing 5 upd
|
||||||
|
later_pass_5=later_pass_all(indcs,:);
|
||||||
|
dlmwrite('later_pass_5.dat',later_pass_5, ' ')
|
||||||
|
indcs=(later_fail_all(:,2)==5); % select cycles that failed containing 5 upd
|
||||||
|
later_fail_5=later_fail_all(indcs,:);
|
||||||
|
dlmwrite('later_fail_5.dat',later_fail_5, ' ')
|
||||||
|
|
||||||
|
indcs=(later_pass_all(:,2)==6); % select cycles that passed containing 6 upd
|
||||||
|
later_pass_6=later_pass_all(indcs,:);
|
||||||
|
dlmwrite('later_pass_6.dat',later_pass_6, ' ')
|
||||||
|
indcs=(later_fail_all(:,2)==6); % select cycles that failed containing 6 upd
|
||||||
|
later_fail_6=later_fail_all(indcs,:);
|
||||||
|
dlmwrite('later_fail_6.dat',later_fail_6, ' ')
|
||||||
|
|
||||||
|
indcs=(later_pass_all(:,2)==7); % select cycles that passed containing 7 upd
|
||||||
|
later_pass_7=later_pass_all(indcs,:);
|
||||||
|
dlmwrite('later_pass_7.dat',later_pass_7, ' ')
|
||||||
|
indcs=(later_fail_all(:,2)==7); % select cycles that failed containing 7 upd
|
||||||
|
later_fail_7=later_fail_all(indcs,:);
|
||||||
|
dlmwrite('later_fail_7.dat',later_fail_7, ' ')
|
||||||
|
|
||||||
|
indcs=(later_pass_all(:,2)==8); % select cycles that passed containing 8 upd
|
||||||
|
later_pass_8=later_pass_all(indcs,:);
|
||||||
|
dlmwrite('later_pass_8.dat',later_pass_8, ' ')
|
||||||
|
indcs=(later_fail_all(:,2)==8); % select cycles that failed containing 8 upd
|
||||||
|
later_fail_8=later_fail_all(indcs,:);
|
||||||
|
dlmwrite('later_fail_8.dat',later_fail_8, ' ')
|
||||||
|
|
||||||
|
indcs=(later_pass_all(:,2)==9); % select cycles that passed containing 9 upd
|
||||||
|
later_pass_9=later_pass_all(indcs,:);
|
||||||
|
dlmwrite('later_pass_9.dat',later_pass_9, ' ')
|
||||||
|
indcs=(later_fail_all(:,2)==9); % select cycles that failed containing 9 upd
|
||||||
|
later_fail_9=later_fail_all(indcs,:);
|
||||||
|
dlmwrite('later_fail_9.dat',later_fail_9, ' ')
|
||||||
|
|
||||||
|
indcs=(later_pass_all(:,2)==10); % select cycles that passed containing 10 upd
|
||||||
|
later_pass_10=later_pass_all(indcs,:);
|
||||||
|
dlmwrite('later_pass_10.dat',later_pass_10, ' ')
|
||||||
|
indcs=(later_fail_all(:,2)==10); % select cycles that failed containing 10 upd
|
||||||
|
later_fail_10=later_fail_all(indcs,:);
|
||||||
|
dlmwrite('later_fail_10.dat',later_fail_10, ' ')
|
||||||
|
|
||||||
|
indcs=(later_pass_all(:,2)==11); % select cycles that passed containing 11 upd
|
||||||
|
later_pass_11=later_pass_all(indcs,:);
|
||||||
|
dlmwrite('later_pass_11.dat',later_pass_11, ' ')
|
||||||
|
indcs=(later_fail_all(:,2)==11); % select cycles that failed containing 11 upd
|
||||||
|
later_fail_11=later_fail_all(indcs,:);
|
||||||
|
dlmwrite('later_fail_11.dat',later_fail_11, ' ')
|
||||||
|
|
||||||
|
indcs=(later_pass_all(:,2)==12); % select cycles that passed containing 12 upd
|
||||||
|
later_pass_12=later_pass_all(indcs,:);
|
||||||
|
dlmwrite('later_pass_12.dat',later_pass_12, ' ')
|
||||||
|
indcs=(later_fail_all(:,2)==12); % select cycles that failed containing 12 upd
|
||||||
|
later_fail_12=later_fail_all(indcs,:);
|
||||||
|
dlmwrite('later_fail_12.dat',later_fail_12, ' ')
|
||||||
|
|
||||||
|
indcs=(later_pass_all(:,2)==13); % select cycles that passed containing 13 upd
|
||||||
|
later_pass_13=later_pass_all(indcs,:);
|
||||||
|
dlmwrite('later_pass_13.dat',later_pass_13, ' ')
|
||||||
|
indcs=(later_fail_all(:,2)==13); % select cycles that failed containing 13 upd
|
||||||
|
later_fail_13=later_fail_all(indcs,:);
|
||||||
|
dlmwrite('later_fail_13.dat',later_fail_13, ' ')
|
||||||
|
|
||||||
|
indcs=(later_pass_all(:,2)==14); % select cycles that passed containing 14 upd
|
||||||
|
later_pass_14=later_pass_all(indcs,:);
|
||||||
|
dlmwrite('later_pass_14.dat',later_pass_14, ' ')
|
||||||
|
indcs=(later_fail_all(:,2)==14); % select cycles that failed containing 14 upd
|
||||||
|
later_fail_14=later_fail_all(indcs,:);
|
||||||
|
dlmwrite('later_fail_14.dat',later_fail_14, ' ')
|
||||||
|
|
||||||
|
indcs=(later_pass_all(:,2)==15); % select cycles that passed containing 15 upd
|
||||||
|
later_pass_15=later_pass_all(indcs,:);
|
||||||
|
dlmwrite('later_pass_15.dat',later_pass_15, ' ')
|
||||||
|
indcs=(later_fail_all(:,2)==15); % select cycles that failed containing 15 upd
|
||||||
|
later_fail_15=later_fail_all(indcs,:);
|
||||||
|
dlmwrite('later_fail_15.dat',later_fail_15, ' ')
|
||||||
|
|
||||||
|
|
||||||
|
indcs=(data_split(:,5)==0); % select cycles that passed
|
||||||
|
split_pass_all=data_split(indcs,:);
|
||||||
|
dlmwrite('split_pass_all.dat',split_pass_all, ' ')
|
||||||
|
|
||||||
|
indcs=(data_split(:,5)!=0); % select cycles that failed
|
||||||
|
split_fail_all=data_split(indcs,:);
|
||||||
|
dlmwrite('split_fail_all.dat',split_fail_all, ' ')
|
||||||
|
|
||||||
|
indcs=(split_pass_all(:,2)==1); % select cycles that passed containing 1 upd
|
||||||
|
split_pass_1=split_pass_all(indcs,:);
|
||||||
|
dlmwrite('split_pass_1.dat',split_pass_1, ' ')
|
||||||
|
indcs=(split_fail_all(:,2)==1); % select cycles that failed containing 1 upd
|
||||||
|
split_fail_1=split_fail_all(indcs,:);
|
||||||
|
dlmwrite('split_fail_1.dat',split_fail_1, ' ')
|
||||||
|
|
||||||
|
indcs=(split_pass_all(:,2)==2); % select cycles that passed containing 2 upd
|
||||||
|
split_pass_2=split_pass_all(indcs,:);
|
||||||
|
dlmwrite('split_pass_2.dat',split_pass_2, ' ')
|
||||||
|
indcs=(split_fail_all(:,2)==2); % select cycles that failed containing 2 upd
|
||||||
|
split_fail_2=split_fail_all(indcs,:);
|
||||||
|
dlmwrite('split_fail_2.dat',split_fail_2, ' ')
|
||||||
|
|
||||||
|
indcs=(split_pass_all(:,2)==3); % select cycles that passed containing 3 upd
|
||||||
|
split_pass_3=split_pass_all(indcs,:);
|
||||||
|
dlmwrite('split_pass_3.dat',split_pass_3, ' ')
|
||||||
|
indcs=(split_fail_all(:,2)==3); % select cycles that failed containing 3 upd
|
||||||
|
split_fail_3=split_fail_all(indcs,:);
|
||||||
|
dlmwrite('split_fail_3.dat',split_fail_3, ' ')
|
||||||
|
|
||||||
|
indcs=(split_pass_all(:,2)==4); % select cycles that passed containing 4 upd
|
||||||
|
split_pass_4=split_pass_all(indcs,:);
|
||||||
|
dlmwrite('split_pass_4.dat',split_pass_4, ' ')
|
||||||
|
indcs=(split_fail_all(:,2)==4); % select cycles that failed containing 4 upd
|
||||||
|
split_fail_4=split_fail_all(indcs,:);
|
||||||
|
dlmwrite('split_fail_4.dat',split_fail_4, ' ')
|
||||||
|
|
||||||
|
indcs=(split_pass_all(:,2)==5); % select cycles that passed containing 5 upd
|
||||||
|
split_pass_5=split_pass_all(indcs,:);
|
||||||
|
dlmwrite('split_pass_5.dat',split_pass_5, ' ')
|
||||||
|
indcs=(split_fail_all(:,2)==5); % select cycles that failed containing 5 upd
|
||||||
|
split_fail_5=split_fail_all(indcs,:);
|
||||||
|
dlmwrite('split_fail_5.dat',split_fail_5, ' ')
|
||||||
|
|
||||||
|
indcs=(split_pass_all(:,2)==6); % select cycles that passed containing 6 upd
|
||||||
|
split_pass_6=split_pass_all(indcs,:);
|
||||||
|
dlmwrite('split_pass_6.dat',split_pass_6, ' ')
|
||||||
|
indcs=(split_fail_all(:,2)==6); % select cycles that failed containing 6 upd
|
||||||
|
split_fail_6=split_fail_all(indcs,:);
|
||||||
|
dlmwrite('split_fail_6.dat',split_fail_6, ' ')
|
||||||
|
|
||||||
|
indcs=(split_pass_all(:,2)==7); % select cycles that passed containing 7 upd
|
||||||
|
split_pass_7=split_pass_all(indcs,:);
|
||||||
|
dlmwrite('split_pass_7.dat',split_pass_7, ' ')
|
||||||
|
indcs=(split_fail_all(:,2)==7); % select cycles that failed containing 7 upd
|
||||||
|
split_fail_7=split_fail_all(indcs,:);
|
||||||
|
dlmwrite('split_fail_7.dat',split_fail_7, ' ')
|
||||||
|
|
||||||
|
indcs=(split_pass_all(:,2)==8); % select cycles that passed containing 8 upd
|
||||||
|
split_pass_8=split_pass_all(indcs,:);
|
||||||
|
dlmwrite('split_pass_8.dat',split_pass_8, ' ')
|
||||||
|
indcs=(split_fail_all(:,2)==8); % select cycles that failed containing 8 upd
|
||||||
|
split_fail_8=split_fail_all(indcs,:);
|
||||||
|
dlmwrite('split_fail_8.dat',split_fail_8, ' ')
|
||||||
|
|
||||||
|
indcs=(split_pass_all(:,2)==9); % select cycles that passed containing 9 upd
|
||||||
|
split_pass_9=split_pass_all(indcs,:);
|
||||||
|
dlmwrite('split_pass_9.dat',split_pass_9, ' ')
|
||||||
|
indcs=(split_fail_all(:,2)==9); % select cycles that failed containing 9 upd
|
||||||
|
split_fail_9=split_fail_all(indcs,:);
|
||||||
|
dlmwrite('split_fail_9.dat',split_fail_9, ' ')
|
||||||
|
|
||||||
|
indcs=(split_pass_all(:,2)==10); % select cycles that passed containing 10 upd
|
||||||
|
split_pass_10=split_pass_all(indcs,:);
|
||||||
|
dlmwrite('split_pass_10.dat',split_pass_10, ' ')
|
||||||
|
indcs=(split_fail_all(:,2)==10); % select cycles that failed containing 10 upd
|
||||||
|
split_fail_10=split_fail_all(indcs,:);
|
||||||
|
dlmwrite('split_fail_10.dat',split_fail_10, ' ')
|
||||||
|
|
||||||
|
indcs=(split_pass_all(:,2)==11); % select cycles that passed containing 11 upd
|
||||||
|
split_pass_11=split_pass_all(indcs,:);
|
||||||
|
dlmwrite('split_pass_11.dat',split_pass_11, ' ')
|
||||||
|
indcs=(split_fail_all(:,2)==11); % select cycles that failed containing 11 upd
|
||||||
|
split_fail_11=split_fail_all(indcs,:);
|
||||||
|
dlmwrite('split_fail_11.dat',split_fail_11, ' ')
|
||||||
|
|
||||||
|
indcs=(split_pass_all(:,2)==12); % select cycles that passed containing 12 upd
|
||||||
|
split_pass_12=split_pass_all(indcs,:);
|
||||||
|
dlmwrite('split_pass_12.dat',split_pass_12, ' ')
|
||||||
|
indcs=(split_fail_all(:,2)==12); % select cycles that failed containing 12 upd
|
||||||
|
split_fail_12=split_fail_all(indcs,:);
|
||||||
|
dlmwrite('split_fail_12.dat',split_fail_12, ' ')
|
||||||
|
|
||||||
|
indcs=(split_pass_all(:,2)==13); % select cycles that passed containing 13 upd
|
||||||
|
split_pass_13=split_pass_all(indcs,:);
|
||||||
|
dlmwrite('split_pass_13.dat',split_pass_13, ' ')
|
||||||
|
indcs=(split_fail_all(:,2)==13); % select cycles that failed containing 13 upd
|
||||||
|
split_fail_13=split_fail_all(indcs,:);
|
||||||
|
dlmwrite('split_fail_13.dat',split_fail_13, ' ')
|
||||||
|
|
||||||
|
indcs=(split_pass_all(:,2)==14); % select cycles that passed containing 14 upd
|
||||||
|
split_pass_14=split_pass_all(indcs,:);
|
||||||
|
dlmwrite('split_pass_14.dat',split_pass_14, ' ')
|
||||||
|
indcs=(split_fail_all(:,2)==14); % select cycles that failed containing 14 upd
|
||||||
|
split_fail_14=split_fail_all(indcs,:);
|
||||||
|
dlmwrite('split_fail_14.dat',split_fail_14, ' ')
|
||||||
|
|
||||||
|
indcs=(split_pass_all(:,2)==15); % select cycles that passed containing 15 upd
|
||||||
|
split_pass_15=split_pass_all(indcs,:);
|
||||||
|
dlmwrite('split_pass_15.dat',split_pass_15, ' ')
|
||||||
|
indcs=(split_fail_all(:,2)==15); % select cycles that failed containing 15 upd
|
||||||
|
split_fail_15=split_fail_all(indcs,:);
|
||||||
|
dlmwrite('split_fail_15.dat',split_fail_15, ' ')
|
||||||
|
|
||||||
|
|
||||||
|
indcs=(data_blocked(:,5)==0); % select cycles that passed
|
||||||
|
blocked_pass_all=data_blocked(indcs,:);
|
||||||
|
dlmwrite('blocked_pass_all.dat',blocked_pass_all, ' ')
|
||||||
|
|
||||||
|
indcs=(data_blocked(:,5)!=0); % select cycles that failed
|
||||||
|
blocked_fail_all=data_blocked(indcs,:);
|
||||||
|
dlmwrite('blocked_fail_all.dat',blocked_fail_all, ' ')
|
||||||
|
|
||||||
|
indcs=(blocked_pass_all(:,2)==1); % select cycles that passed containing 1 upd
|
||||||
|
blocked_pass_1=blocked_pass_all(indcs,:);
|
||||||
|
dlmwrite('blocked_pass_1.dat',blocked_pass_1, ' ')
|
||||||
|
indcs=(blocked_fail_all(:,2)==1); % select cycles that failed containing 1 upd
|
||||||
|
blocked_fail_1=blocked_fail_all(indcs,:);
|
||||||
|
dlmwrite('blocked_fail_1.dat',blocked_fail_1, ' ')
|
||||||
|
|
||||||
|
indcs=(blocked_pass_all(:,2)==2); % select cycles that passed containing 2 upd
|
||||||
|
blocked_pass_2=blocked_pass_all(indcs,:);
|
||||||
|
dlmwrite('blocked_pass_2.dat',blocked_pass_2, ' ')
|
||||||
|
indcs=(blocked_fail_all(:,2)==2); % select cycles that failed containing 2 upd
|
||||||
|
blocked_fail_2=blocked_fail_all(indcs,:);
|
||||||
|
dlmwrite('blocked_fail_2.dat',blocked_fail_2, ' ')
|
||||||
|
|
||||||
|
indcs=(blocked_pass_all(:,2)==3); % select cycles that passed containing 3 upd
|
||||||
|
blocked_pass_3=blocked_pass_all(indcs,:);
|
||||||
|
dlmwrite('blocked_pass_3.dat',blocked_pass_3, ' ')
|
||||||
|
indcs=(blocked_fail_all(:,2)==3); % select cycles that failed containing 3 upd
|
||||||
|
blocked_fail_3=blocked_fail_all(indcs,:);
|
||||||
|
dlmwrite('blocked_fail_3.dat',blocked_fail_3, ' ')
|
||||||
|
|
||||||
|
indcs=(blocked_pass_all(:,2)==4); % select cycles that passed containing 4 upd
|
||||||
|
blocked_pass_4=blocked_pass_all(indcs,:);
|
||||||
|
dlmwrite('blocked_pass_4.dat',blocked_pass_4, ' ')
|
||||||
|
indcs=(blocked_fail_all(:,2)==4); % select cycles that failed containing 4 upd
|
||||||
|
blocked_fail_4=blocked_fail_all(indcs,:);
|
||||||
|
dlmwrite('blocked_fail_4.dat',blocked_fail_4, ' ')
|
||||||
|
|
||||||
|
indcs=(blocked_pass_all(:,2)==5); % select cycles that passed containing 5 upd
|
||||||
|
blocked_pass_5=blocked_pass_all(indcs,:);
|
||||||
|
dlmwrite('blocked_pass_5.dat',blocked_pass_5, ' ')
|
||||||
|
indcs=(blocked_fail_all(:,2)==5); % select cycles that failed containing 5 upd
|
||||||
|
blocked_fail_5=blocked_fail_all(indcs,:);
|
||||||
|
dlmwrite('blocked_fail_5.dat',blocked_fail_5, ' ')
|
||||||
|
|
||||||
|
indcs=(blocked_pass_all(:,2)==6); % select cycles that passed containing 6 upd
|
||||||
|
blocked_pass_6=blocked_pass_all(indcs,:);
|
||||||
|
dlmwrite('blocked_pass_6.dat',blocked_pass_6, ' ')
|
||||||
|
indcs=(blocked_fail_all(:,2)==6); % select cycles that failed containing 6 upd
|
||||||
|
blocked_fail_6=blocked_fail_all(indcs,:);
|
||||||
|
dlmwrite('blocked_fail_6.dat',blocked_fail_6, ' ')
|
||||||
|
|
||||||
|
indcs=(blocked_pass_all(:,2)==7); % select cycles that passed containing 7 upd
|
||||||
|
blocked_pass_7=blocked_pass_all(indcs,:);
|
||||||
|
dlmwrite('blocked_pass_7.dat',blocked_pass_7, ' ')
|
||||||
|
indcs=(blocked_fail_all(:,2)==7); % select cycles that failed containing 7 upd
|
||||||
|
blocked_fail_7=blocked_fail_all(indcs,:);
|
||||||
|
dlmwrite('blocked_fail_7.dat',blocked_fail_7, ' ')
|
||||||
|
|
||||||
|
indcs=(blocked_pass_all(:,2)==8); % select cycles that passed containing 8 upd
|
||||||
|
blocked_pass_8=blocked_pass_all(indcs,:);
|
||||||
|
dlmwrite('blocked_pass_8.dat',blocked_pass_8, ' ')
|
||||||
|
indcs=(blocked_fail_all(:,2)==8); % select cycles that failed containing 8 upd
|
||||||
|
blocked_fail_8=blocked_fail_all(indcs,:);
|
||||||
|
dlmwrite('blocked_fail_8.dat',blocked_fail_8, ' ')
|
||||||
|
|
||||||
|
indcs=(blocked_pass_all(:,2)==9); % select cycles that passed containing 9 upd
|
||||||
|
blocked_pass_9=blocked_pass_all(indcs,:);
|
||||||
|
dlmwrite('blocked_pass_9.dat',blocked_pass_9, ' ')
|
||||||
|
indcs=(blocked_fail_all(:,2)==9); % select cycles that failed containing 9 upd
|
||||||
|
blocked_fail_9=blocked_fail_all(indcs,:);
|
||||||
|
dlmwrite('blocked_fail_9.dat',blocked_fail_9, ' ')
|
||||||
|
|
||||||
|
indcs=(blocked_pass_all(:,2)==10); % select cycles that passed containing 10 upd
|
||||||
|
blocked_pass_10=blocked_pass_all(indcs,:);
|
||||||
|
dlmwrite('blocked_pass_10.dat',blocked_pass_10, ' ')
|
||||||
|
indcs=(blocked_fail_all(:,2)==10); % select cycles that failed containing 10 upd
|
||||||
|
blocked_fail_10=blocked_fail_all(indcs,:);
|
||||||
|
dlmwrite('blocked_fail_10.dat',blocked_fail_10, ' ')
|
||||||
|
|
||||||
|
indcs=(blocked_pass_all(:,2)==11); % select cycles that passed containing 11 upd
|
||||||
|
blocked_pass_11=blocked_pass_all(indcs,:);
|
||||||
|
dlmwrite('blocked_pass_11.dat',blocked_pass_11, ' ')
|
||||||
|
indcs=(blocked_fail_all(:,2)==11); % select cycles that failed containing 11 upd
|
||||||
|
blocked_fail_11=blocked_fail_all(indcs,:);
|
||||||
|
dlmwrite('blocked_fail_11.dat',blocked_fail_11, ' ')
|
||||||
|
|
||||||
|
indcs=(blocked_pass_all(:,2)==12); % select cycles that passed containing 12 upd
|
||||||
|
blocked_pass_12=blocked_pass_all(indcs,:);
|
||||||
|
dlmwrite('blocked_pass_12.dat',blocked_pass_12, ' ')
|
||||||
|
indcs=(blocked_fail_all(:,2)==12); % select cycles that failed containing 12 upd
|
||||||
|
blocked_fail_12=blocked_fail_all(indcs,:);
|
||||||
|
dlmwrite('blocked_fail_12.dat',blocked_fail_12, ' ')
|
||||||
|
|
||||||
|
indcs=(blocked_pass_all(:,2)==13); % select cycles that passed containing 13 upd
|
||||||
|
blocked_pass_13=blocked_pass_all(indcs,:);
|
||||||
|
dlmwrite('blocked_pass_13.dat',blocked_pass_13, ' ')
|
||||||
|
indcs=(blocked_fail_all(:,2)==13); % select cycles that failed containing 13 upd
|
||||||
|
blocked_fail_13=blocked_fail_all(indcs,:);
|
||||||
|
dlmwrite('blocked_fail_13.dat',blocked_fail_13, ' ')
|
||||||
|
|
||||||
|
indcs=(blocked_pass_all(:,2)==14); % select cycles that passed containing 14 upd
|
||||||
|
blocked_pass_14=blocked_pass_all(indcs,:);
|
||||||
|
dlmwrite('blocked_pass_14.dat',blocked_pass_14, ' ')
|
||||||
|
indcs=(blocked_fail_all(:,2)==14); % select cycles that failed containing 14 upd
|
||||||
|
blocked_fail_14=blocked_fail_all(indcs,:);
|
||||||
|
dlmwrite('blocked_fail_14.dat',blocked_fail_14, ' ')
|
||||||
|
|
||||||
|
indcs=(blocked_pass_all(:,2)==15); % select cycles that passed containing 15 upd
|
||||||
|
blocked_pass_15=blocked_pass_all(indcs,:);
|
||||||
|
dlmwrite('blocked_pass_15.dat',blocked_pass_15, ' ')
|
||||||
|
indcs=(blocked_fail_all(:,2)==15); % select cycles that failed containing 15 upd
|
||||||
|
blocked_fail_15=blocked_fail_all(indcs,:);
|
||||||
|
dlmwrite('blocked_fail_15.dat',blocked_fail_15, ' ')
|
||||||
|
|
||||||
|
|
||||||
|
indcs=(data_lapack(:,5)==0); % select cycles that passed
|
||||||
|
lapack_pass_all=data_lapack(indcs,:);
|
||||||
|
dlmwrite('lapack_pass_all.dat',lapack_pass_all, ' ')
|
||||||
|
|
||||||
|
indcs=(data_lapack(:,5)!=0); % select cycles that failed
|
||||||
|
lapack_fail_all=data_lapack(indcs,:);
|
||||||
|
dlmwrite('lapack_fail_all.dat',lapack_fail_all, ' ')
|
||||||
|
|
||||||
|
indcs=(lapack_pass_all(:,2)==1); % select cycles that passed containing 1 upd
|
||||||
|
lapack_pass_1=lapack_pass_all(indcs,:);
|
||||||
|
dlmwrite('lapack_pass_1.dat',lapack_pass_1, ' ')
|
||||||
|
indcs=(lapack_fail_all(:,2)==1); % select cycles that failed containing 1 upd
|
||||||
|
lapack_fail_1=lapack_fail_all(indcs,:);
|
||||||
|
dlmwrite('lapack_fail_1.dat',lapack_fail_1, ' ')
|
||||||
|
|
||||||
|
indcs=(lapack_pass_all(:,2)==2); % select cycles that passed containing 2 upd
|
||||||
|
lapack_pass_2=lapack_pass_all(indcs,:);
|
||||||
|
dlmwrite('lapack_pass_2.dat',lapack_pass_2, ' ')
|
||||||
|
indcs=(lapack_fail_all(:,2)==2); % select cycles that failed containing 2 upd
|
||||||
|
lapack_fail_2=lapack_fail_all(indcs,:);
|
||||||
|
dlmwrite('lapack_fail_2.dat',lapack_fail_2, ' ')
|
||||||
|
|
||||||
|
indcs=(lapack_pass_all(:,2)==3); % select cycles that passed containing 3 upd
|
||||||
|
lapack_pass_3=lapack_pass_all(indcs,:);
|
||||||
|
dlmwrite('lapack_pass_3.dat',lapack_pass_3, ' ')
|
||||||
|
indcs=(lapack_fail_all(:,2)==3); % select cycles that failed containing 3 upd
|
||||||
|
lapack_fail_3=lapack_fail_all(indcs,:);
|
||||||
|
dlmwrite('lapack_fail_3.dat',lapack_fail_3, ' ')
|
||||||
|
|
||||||
|
indcs=(lapack_pass_all(:,2)==4); % select cycles that passed containing 4 upd
|
||||||
|
lapack_pass_4=lapack_pass_all(indcs,:);
|
||||||
|
dlmwrite('lapack_pass_4.dat',lapack_pass_4, ' ')
|
||||||
|
indcs=(lapack_fail_all(:,2)==4); % select cycles that failed containing 4 upd
|
||||||
|
lapack_fail_4=lapack_fail_all(indcs,:);
|
||||||
|
dlmwrite('lapack_fail_4.dat',lapack_fail_4, ' ')
|
||||||
|
|
||||||
|
indcs=(lapack_pass_all(:,2)==5); % select cycles that passed containing 5 upd
|
||||||
|
lapack_pass_5=lapack_pass_all(indcs,:);
|
||||||
|
dlmwrite('lapack_pass_5.dat',lapack_pass_5, ' ')
|
||||||
|
indcs=(lapack_fail_all(:,2)==5); % select cycles that failed containing 5 upd
|
||||||
|
lapack_fail_5=lapack_fail_all(indcs,:);
|
||||||
|
dlmwrite('lapack_fail_5.dat',lapack_fail_5, ' ')
|
||||||
|
|
||||||
|
indcs=(lapack_pass_all(:,2)==6); % select cycles that passed containing 6 upd
|
||||||
|
lapack_pass_6=lapack_pass_all(indcs,:);
|
||||||
|
dlmwrite('lapack_pass_6.dat',lapack_pass_6, ' ')
|
||||||
|
indcs=(lapack_fail_all(:,2)==6); % select cycles that failed containing 6 upd
|
||||||
|
lapack_fail_6=lapack_fail_all(indcs,:);
|
||||||
|
dlmwrite('lapack_fail_6.dat',lapack_fail_6, ' ')
|
||||||
|
|
||||||
|
indcs=(lapack_pass_all(:,2)==7); % select cycles that passed containing 7 upd
|
||||||
|
lapack_pass_7=lapack_pass_all(indcs,:);
|
||||||
|
dlmwrite('lapack_pass_7.dat',lapack_pass_7, ' ')
|
||||||
|
indcs=(lapack_fail_all(:,2)==7); % select cycles that failed containing 7 upd
|
||||||
|
lapack_fail_7=lapack_fail_all(indcs,:);
|
||||||
|
dlmwrite('lapack_fail_7.dat',lapack_fail_7, ' ')
|
||||||
|
|
||||||
|
indcs=(lapack_pass_all(:,2)==8); % select cycles that passed containing 8 upd
|
||||||
|
lapack_pass_8=lapack_pass_all(indcs,:);
|
||||||
|
dlmwrite('lapack_pass_8.dat',lapack_pass_8, ' ')
|
||||||
|
indcs=(lapack_fail_all(:,2)==8); % select cycles that failed containing 8 upd
|
||||||
|
lapack_fail_8=lapack_fail_all(indcs,:);
|
||||||
|
dlmwrite('lapack_fail_8.dat',lapack_fail_8, ' ')
|
||||||
|
|
||||||
|
indcs=(lapack_pass_all(:,2)==9); % select cycles that passed containing 9 upd
|
||||||
|
lapack_pass_9=lapack_pass_all(indcs,:);
|
||||||
|
dlmwrite('lapack_pass_9.dat',lapack_pass_9, ' ')
|
||||||
|
indcs=(lapack_fail_all(:,2)==9); % select cycles that failed containing 9 upd
|
||||||
|
lapack_fail_9=lapack_fail_all(indcs,:);
|
||||||
|
dlmwrite('lapack_fail_9.dat',lapack_fail_9, ' ')
|
||||||
|
|
||||||
|
indcs=(lapack_pass_all(:,2)==10); % select cycles that passed containing 10 upd
|
||||||
|
lapack_pass_10=lapack_pass_all(indcs,:);
|
||||||
|
dlmwrite('lapack_pass_10.dat',lapack_pass_10, ' ')
|
||||||
|
indcs=(lapack_fail_all(:,2)==10); % select cycles that failed containing 10 upd
|
||||||
|
lapack_fail_10=lapack_fail_all(indcs,:);
|
||||||
|
dlmwrite('lapack_fail_10.dat',lapack_fail_10, ' ')
|
||||||
|
|
||||||
|
indcs=(lapack_pass_all(:,2)==11); % select cycles that passed containing 11 upd
|
||||||
|
lapack_pass_11=lapack_pass_all(indcs,:);
|
||||||
|
dlmwrite('lapack_pass_11.dat',lapack_pass_11, ' ')
|
||||||
|
indcs=(lapack_fail_all(:,2)==11); % select cycles that failed containing 11 upd
|
||||||
|
lapack_fail_11=lapack_fail_all(indcs,:);
|
||||||
|
dlmwrite('lapack_fail_11.dat',lapack_fail_11, ' ')
|
||||||
|
|
||||||
|
indcs=(lapack_pass_all(:,2)==12); % select cycles that passed containing 12 upd
|
||||||
|
lapack_pass_12=lapack_pass_all(indcs,:);
|
||||||
|
dlmwrite('lapack_pass_12.dat',lapack_pass_12, ' ')
|
||||||
|
indcs=(lapack_fail_all(:,2)==12); % select cycles that failed containing 12 upd
|
||||||
|
lapack_fail_12=lapack_fail_all(indcs,:);
|
||||||
|
dlmwrite('lapack_fail_12.dat',lapack_fail_12, ' ')
|
||||||
|
|
||||||
|
indcs=(lapack_pass_all(:,2)==13); % select cycles that passed containing 13 upd
|
||||||
|
lapack_pass_13=lapack_pass_all(indcs,:);
|
||||||
|
dlmwrite('lapack_pass_13.dat',lapack_pass_13, ' ')
|
||||||
|
indcs=(lapack_fail_all(:,2)==13); % select cycles that failed containing 13 upd
|
||||||
|
lapack_fail_13=lapack_fail_all(indcs,:);
|
||||||
|
dlmwrite('lapack_fail_13.dat',lapack_fail_13, ' ')
|
||||||
|
|
||||||
|
indcs=(lapack_pass_all(:,2)==14); % select cycles that passed containing 14 upd
|
||||||
|
lapack_pass_14=lapack_pass_all(indcs,:);
|
||||||
|
dlmwrite('lapack_pass_14.dat',lapack_pass_14, ' ')
|
||||||
|
indcs=(lapack_fail_all(:,2)==14); % select cycles that failed containing 14 upd
|
||||||
|
lapack_fail_14=lapack_fail_all(indcs,:);
|
||||||
|
dlmwrite('lapack_fail_14.dat',lapack_fail_14, ' ')
|
||||||
|
|
||||||
|
indcs=(lapack_pass_all(:,2)==15); % select cycles that passed containing 15 upd
|
||||||
|
lapack_pass_15=lapack_pass_all(indcs,:);
|
||||||
|
dlmwrite('lapack_pass_15.dat',lapack_pass_15, ' ')
|
||||||
|
indcs=(lapack_fail_all(:,2)==15); % select cycles that failed containing 15 upd
|
||||||
|
lapack_fail_15=lapack_fail_all(indcs,:);
|
||||||
|
dlmwrite('lapack_fail_15.dat',lapack_fail_15, ' ')
|
||||||
|
|
||||||
|
|
||||||
|
indcs=(data_wb2(:,5)==0); % select cycles that passed
|
||||||
|
wb2_pass_all=data_wb2(indcs,:);
|
||||||
|
dlmwrite('wb2_pass_all.dat',wb2_pass_all, ' ')
|
||||||
|
|
||||||
|
indcs=(data_wb2(:,5)!=0); % select cycles that failed
|
||||||
|
wb2_fail_all=data_wb2(indcs,:);
|
||||||
|
dlmwrite('wb2_fail_all.dat',wb2_fail_all, ' ')
|
||||||
|
|
||||||
|
|
||||||
|
indcs=(data_wb3(:,5)==0); % select cycles that passed
|
||||||
|
wb3_pass_all=data_wb3(indcs,:);
|
||||||
|
dlmwrite('wb3_pass_all.dat',wb3_pass_all, ' ')
|
||||||
|
|
||||||
|
indcs=(data_wb3(:,5)!=0); % select cycles that failed
|
||||||
|
wb3_fail_all=data_wb3(indcs,:);
|
||||||
|
dlmwrite('wb3_fail_all.dat',wb3_fail_all, ' ')
|
||||||
|
|
||||||
|
|
||||||
|
n_all_cycles=size(data_anthony)(1);
|
||||||
|
n_1_cycles=size(anthony_pass_1)(1)+size(anthony_fail_1)(1);
|
||||||
|
n_2_cycles=size(anthony_pass_2)(1)+size(anthony_fail_2)(1);
|
||||||
|
n_3_cycles=size(anthony_pass_3)(1)+size(anthony_fail_3)(1);
|
||||||
|
n_6_cycles=size(anthony_pass_6)(1)+size(anthony_fail_6)(1);
|
||||||
|
|
||||||
|
fail_rate_all_anthony=sum(anthony_fail_all(:,5))/n_all_cycles;
|
||||||
|
fail_rate_all_naive=sum(naive_fail_all(:,5))/n_all_cycles;
|
||||||
|
fail_rate_all_later=sum(later_fail_all(:,5))/n_all_cycles;
|
||||||
|
fail_rate_all_split=sum(split_fail_all(:,5))/n_all_cycles;
|
||||||
|
fail_rate_all_blocked=sum(blocked_fail_all(:,5))/n_all_cycles;
|
||||||
|
fail_rate_all_lapack=sum(lapack_fail_all(:,5))/n_all_cycles;
|
||||||
|
|
||||||
|
fail_rate_1_anthony=sum(anthony_fail_1(:,5))/n_1_cycles;
|
||||||
|
fail_rate_1_naive=sum(naive_fail_1(:,5))/n_1_cycles;
|
||||||
|
fail_rate_1_later=sum(later_fail_1(:,5))/n_1_cycles;
|
||||||
|
fail_rate_1_split=sum(split_fail_1(:,5))/n_1_cycles;
|
||||||
|
fail_rate_1_blocked=sum(blocked_fail_1(:,5))/n_1_cycles;
|
||||||
|
fail_rate_1_lapack=sum(lapack_fail_1(:,5))/n_1_cycles;
|
||||||
|
|
||||||
|
fail_rate_2_anthony=sum(anthony_fail_2(:,5))/n_2_cycles;
|
||||||
|
fail_rate_2_naive=sum(naive_fail_2(:,5))/n_2_cycles;
|
||||||
|
fail_rate_2_later=sum(later_fail_2(:,5))/n_2_cycles;
|
||||||
|
fail_rate_2_split=sum(split_fail_2(:,5))/n_2_cycles;
|
||||||
|
fail_rate_2_blocked=sum(blocked_fail_2(:,5))/n_2_cycles;
|
||||||
|
fail_rate_2_lapack=sum(lapack_fail_2(:,5))/n_2_cycles;
|
||||||
|
fail_rate_wb2=sum(data_wb2(:,5))/n_2_cycles;
|
||||||
|
|
||||||
|
fail_rate_3_anthony=sum(anthony_fail_3(:,5))/n_3_cycles;
|
||||||
|
fail_rate_3_naive=sum(naive_fail_3(:,5))/n_3_cycles;
|
||||||
|
fail_rate_3_later=sum(later_fail_3(:,5))/n_3_cycles;
|
||||||
|
fail_rate_3_split=sum(split_fail_3(:,5))/n_3_cycles;
|
||||||
|
fail_rate_3_blocked=sum(blocked_fail_3(:,5))/n_3_cycles;
|
||||||
|
fail_rate_3_lapack=sum(lapack_fail_3(:,5))/n_3_cycles;
|
||||||
|
fail_rate_wb3=sum(data_wb3(:,5))/n_3_cycles;
|
||||||
|
|
||||||
|
fail_rate_6_anthony=sum(anthony_fail_6(:,5))/n_6_cycles;
|
||||||
|
fail_rate_6_naive=sum(naive_fail_6(:,5))/n_6_cycles;
|
||||||
|
fail_rate_6_later=sum(later_fail_6(:,5))/n_6_cycles;
|
||||||
|
fail_rate_6_split=sum(split_fail_6(:,5))/n_6_cycles;
|
||||||
|
fail_rate_6_lapack=sum(lapack_fail_6(:,5))/n_6_cycles;
|
||||||
|
fail_rate_6_blocked=sum(blocked_fail_6(:,5))/n_6_cycles;
|
||||||
|
|
||||||
|
printf("\n");
|
||||||
|
printf ("Fail rates for all (N=%d) cycles\n", n_all_cycles);
|
||||||
|
printf ("-------------------------------------------------------------------------------------------\n");
|
||||||
|
printf ("Anthony:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_all_anthony*100, fail_rate_all_anthony, size(anthony_pass_all)(1), size(anthony_fail_all)(1), size(anthony_pass_all)(1)+size(anthony_fail_all)(1));
|
||||||
|
printf ("Naive:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_all_naive*100, fail_rate_all_naive, size(naive_pass_all)(1), size(naive_fail_all)(1), size(naive_pass_all)(1)+size(naive_fail_all)(1));
|
||||||
|
printf ("Later:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_all_later*100, fail_rate_all_later, size(later_pass_all)(1), size(later_fail_all)(1), size(later_pass_all)(1)+size(later_fail_all)(1));
|
||||||
|
printf ("Splitting:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_all_split*100, fail_rate_all_split, size(split_pass_all)(1), size(split_fail_all)(1), size(split_pass_all)(1)+size(split_fail_all)(1));
|
||||||
|
printf ("Blocked:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_all_blocked*100, fail_rate_all_blocked, size(blocked_pass_all)(1), size(blocked_fail_all)(1), size(blocked_pass_all)(1)+size(blocked_fail_all)(1));
|
||||||
|
printf ("Lapack:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_all_lapack*100, fail_rate_all_lapack, size(lapack_pass_all)(1), size(lapack_fail_all)(1), size(lapack_pass_all)(1)+size(lapack_fail_all)(1));
|
||||||
|
|
||||||
|
printf("\n");
|
||||||
|
printf ("Fail rates for cycles containing 1 update (N=%d) (solely due to numerical noise)\n", n_1_cycles);
|
||||||
|
printf ("-------------------------------------------------------------------------------------------\n");
|
||||||
|
printf ("Anthony:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_1_anthony*100, fail_rate_1_anthony, size(anthony_pass_1)(1), size(anthony_fail_1)(1), n_1_cycles);
|
||||||
|
printf ("Naive:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_1_naive*100, fail_rate_1_naive, size(naive_pass_1)(1), size(naive_fail_1)(1), size(naive_pass_1)(1)+size(naive_fail_1)(1));
|
||||||
|
printf ("Later:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_1_later*100, fail_rate_1_later, size(later_pass_1)(1), size(later_fail_1)(1), size(later_pass_1)(1)+size(later_fail_1)(1));
|
||||||
|
printf ("Splitting:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_1_split*100, fail_rate_1_split, size(split_pass_1)(1), size(split_fail_1)(1), size(split_pass_1)(1)+size(split_fail_1)(1));
|
||||||
|
printf ("Blocked:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_1_blocked*100, fail_rate_1_blocked, size(blocked_pass_1)(1), size(blocked_fail_1)(1), size(blocked_pass_1)(1)+size(blocked_fail_1)(1));
|
||||||
|
printf ("Lapack:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_1_lapack*100, fail_rate_1_lapack, size(lapack_pass_1)(1), size(lapack_fail_1)(1), size(lapack_pass_1)(1)+size(lapack_fail_1)(1));
|
||||||
|
|
||||||
|
printf("\n");
|
||||||
|
printf ("Fail rates for cycles containing 2 updates (N=%d) (compare blocked w/ WB2)\n", n_2_cycles);
|
||||||
|
printf ("-------------------------------------------------------------------------------------------\n");
|
||||||
|
printf ("Anthony:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_2_anthony*100, fail_rate_2_anthony, size(anthony_pass_2)(1), size(anthony_fail_2)(1), n_2_cycles);
|
||||||
|
printf ("Naive:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_2_naive*100, fail_rate_2_naive, size(naive_pass_2)(1), size(naive_fail_2)(1), size(naive_pass_2)(1)+size(naive_fail_2)(1));
|
||||||
|
printf ("Later:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_2_later*100, fail_rate_2_later, size(later_pass_2)(1), size(later_fail_2)(1), size(later_pass_2)(1)+size(later_fail_2)(1));
|
||||||
|
printf ("Splitting:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_2_split*100, fail_rate_2_split, size(split_pass_2)(1), size(split_fail_2)(1), size(split_pass_2)(1)+size(split_fail_2)(1));
|
||||||
|
printf ("Blocked:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_2_blocked*100, fail_rate_2_blocked, size(blocked_pass_2)(1), size(blocked_fail_2)(1), size(blocked_pass_2)(1)+size(blocked_fail_2)(1));
|
||||||
|
printf ("Lapack:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_2_lapack*100, fail_rate_2_lapack, size(lapack_pass_2)(1), size(lapack_fail_2)(1), size(lapack_pass_2)(1)+size(lapack_fail_2)(1));
|
||||||
|
printf ("Woodbury 2:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_wb2*100, fail_rate_wb2, size(wb2_pass_all)(1), size(wb2_fail_all)(1), size(wb2_pass_all)(1)+size(wb2_fail_all)(1));
|
||||||
|
|
||||||
|
printf("\n");
|
||||||
|
printf ("Fail rates for cycles containing 3 updates (N=%d) (compare blocked w/ WB3)\n", n_3_cycles);
|
||||||
|
printf ("-------------------------------------------------------------------------------------------\n");
|
||||||
|
printf ("Anthony:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_3_anthony*100, fail_rate_3_anthony, size(anthony_pass_3)(1), size(anthony_fail_3)(1), n_3_cycles);
|
||||||
|
printf ("Naive:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_3_naive*100, fail_rate_3_naive, size(naive_pass_3)(1), size(naive_fail_3)(1), size(naive_pass_3)(1)+size(naive_fail_3)(1));
|
||||||
|
printf ("Later:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_3_later*100, fail_rate_3_later, size(later_pass_3)(1), size(later_fail_3)(1), size(later_pass_3)(1)+size(later_fail_3)(1));
|
||||||
|
printf ("Splitting:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_3_split*100, fail_rate_3_split, size(split_pass_3)(1), size(split_fail_3)(1), size(split_pass_3)(1)+size(split_fail_3)(1));
|
||||||
|
printf ("Blocked:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_3_blocked*100, fail_rate_3_blocked, size(blocked_pass_3)(1), size(blocked_fail_3)(1), size(blocked_pass_3)(1)+size(blocked_fail_3)(1));
|
||||||
|
printf ("Lapack:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_3_lapack*100, fail_rate_3_lapack, size(lapack_pass_3)(1), size(lapack_fail_3)(1), size(lapack_pass_3)(1)+size(lapack_fail_3)(1));
|
||||||
|
printf ("Woodbury 3:\t%f (= %f x N cycles; %d pass + %d fail = %d tot.)\n", fail_rate_wb3*100, fail_rate_wb3, size(wb3_pass_all)(1), size(wb3_fail_all)(1), size(wb3_pass_all)(1)+size(wb3_fail_all)(1));
|
||||||
|
|
||||||
|
printf("\n");
|
||||||
|
printf ("Fail rates for cycles containing 6 updates (N=%d) (blocked vs splitting in multiples of 3)\n", n_6_cycles);
|
||||||
|
printf ("-------------------------------------------------------------------------------------------\n");
|
||||||
|
printf ("Anthony:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_6_anthony*100, fail_rate_6_anthony, size(anthony_pass_6)(1), size(anthony_fail_6)(1), n_6_cycles);
|
||||||
|
printf ("Naive:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_6_naive*100, fail_rate_6_naive, size(naive_pass_6)(1), size(naive_fail_6)(1), size(naive_pass_6)(1)+size(naive_fail_6)(1));
|
||||||
|
printf ("Later:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_6_later*100, fail_rate_6_later, size(later_pass_6)(1), size(later_fail_6)(1), size(later_pass_6)(1)+size(later_fail_6)(1));
|
||||||
|
printf ("Splitting:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_6_split*100, fail_rate_6_split, size(split_pass_6)(1), size(split_fail_6)(1), size(split_pass_6)(1)+size(split_fail_6)(1));
|
||||||
|
printf ("Blocked:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_6_blocked*100, fail_rate_6_blocked, size(blocked_pass_6)(1), size(blocked_fail_6)(1), size(blocked_pass_6)(1)+size(blocked_fail_6)(1));
|
||||||
|
printf ("Lapack:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_6_lapack*100, fail_rate_6_lapack, size(lapack_pass_6)(1), size(lapack_fail_6)(1), size(lapack_pass_6)(1)+size(lapack_fail_6)(1));
|
46
independent_test_harness/get_stats.m.old
Executable file
46
independent_test_harness/get_stats.m.old
Executable file
@ -0,0 +1,46 @@
|
|||||||
|
#! /bin/octave -qf
|
||||||
|
|
||||||
|
if (nargin < 2)
|
||||||
|
printf ("Call with: ./arguments.m ⟨path/to/data/files⟩ ⟨nr of updates⟩\n");
|
||||||
|
return;
|
||||||
|
endif
|
||||||
|
arg_list = argv ();
|
||||||
|
INDIR = arg_list{1};
|
||||||
|
INNR = arg_list{2};
|
||||||
|
|
||||||
|
FILE_ANTHONY = [INDIR "/" INNR "_anthony.dat"];
|
||||||
|
FILE_NAIVE = [INDIR "/" INNR "_naive.dat"];
|
||||||
|
FILE_SPLIT = [INDIR "/" INNR "_splitting.dat"];
|
||||||
|
FILE_BLOCK = [INDIR "/" INNR "_blocked.dat"];
|
||||||
|
|
||||||
|
data_antho=dlmread(FILE_ANTHONY);
|
||||||
|
data_naive = dlmread (FILE_NAIVE);
|
||||||
|
data_split = dlmread (FILE_SPLIT);
|
||||||
|
data_block = dlmread (FILE_BLOCK);
|
||||||
|
|
||||||
|
printf ("\n");
|
||||||
|
n_cycles = size(data_antho)(1)-2
|
||||||
|
printf ("\n");
|
||||||
|
|
||||||
|
average_cpucycls_p_upd_antho = mean(data_antho(2:n_cycles+1,10))
|
||||||
|
average_cpucycls_p_upd_naive = mean(data_naive(2:n_cycles+1,10))
|
||||||
|
average_cpucycls_p_upd_split = mean(data_split(2:n_cycles+1,10))
|
||||||
|
average_cpucycls_p_upd_block = mean(data_block(2:n_cycles+1,10))
|
||||||
|
printf ("\n");
|
||||||
|
|
||||||
|
std_cpucycls_p_upd_antho = std(data_antho(2:n_cycles+1,10))
|
||||||
|
std_cpucycls_p_upd_naive = std(data_naive(2:n_cycles+1,10))
|
||||||
|
std_cpucycls_p_upd_split = std(data_split(2:n_cycles+1,10))
|
||||||
|
std_cpucycls_p_upd_block = std(data_block(2:n_cycles+1,10))
|
||||||
|
printf ("\n");
|
||||||
|
|
||||||
|
factor_naive = average_cpucycls_p_upd_naive / average_cpucycls_p_upd_antho
|
||||||
|
factor_split = average_cpucycls_p_upd_split / average_cpucycls_p_upd_antho
|
||||||
|
factor_block = average_cpucycls_p_upd_block / average_cpucycls_p_upd_antho
|
||||||
|
printf ("\n");
|
||||||
|
|
||||||
|
fail_rate_antho = sum( data_antho(2:n_cycles+1, 5) ) / n_cycles
|
||||||
|
fail_rate_naive = sum( data_naive(2:n_cycles+1, 5) ) / n_cycles
|
||||||
|
fail_rate_split = sum( data_split(2:n_cycles+1, 5) ) / n_cycles
|
||||||
|
fail_rate_block = sum( data_block(2:n_cycles+1, 5) ) / n_cycles
|
||||||
|
|
56
independent_test_harness/kernels.h
Normal file
56
independent_test_harness/kernels.h
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
#include <mkl_lapacke.h>
|
||||||
|
|
||||||
|
lapack_int inverse(double *A, uint64_t Dim, uint64_t LDS);
|
||||||
|
|
||||||
|
uint32_t qmckl_sherman_morrison(
|
||||||
|
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
|
||||||
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
||||||
|
const uint64_t *__restrict Updates_index, const double breakdown,
|
||||||
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
||||||
|
double *__restrict determinant);
|
||||||
|
|
||||||
|
uint32_t qmckl_sherman_morrison_splitting(
|
||||||
|
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
|
||||||
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
||||||
|
const uint64_t *__restrict Updates_index, const double breakdown,
|
||||||
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
||||||
|
double *__restrict determinant);
|
||||||
|
|
||||||
|
uint32_t qmckl_sherman_morrison_smw32s(
|
||||||
|
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
|
||||||
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
||||||
|
const uint64_t *__restrict Updates_index, const double breakdown,
|
||||||
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
||||||
|
double *__restrict determinant);
|
||||||
|
|
||||||
|
uint32_t qmckl_woodbury_3(const uint64_t vLDS, const uint64_t vDim,
|
||||||
|
const double *__restrict __attribute__((aligned(8)))
|
||||||
|
Updates,
|
||||||
|
const uint64_t *__restrict Updates_index,
|
||||||
|
const double breakdown,
|
||||||
|
double *__restrict __attribute__((aligned(8)))
|
||||||
|
Slater_inv,
|
||||||
|
double *__restrict determinant);
|
||||||
|
|
||||||
|
uint32_t qmckl_woodbury_2(const uint64_t vLDS, const uint64_t vDim,
|
||||||
|
const double *__restrict __attribute__((aligned(8)))
|
||||||
|
Updates,
|
||||||
|
const uint64_t *__restrict Updates_index,
|
||||||
|
const double breakdown,
|
||||||
|
double *__restrict __attribute__((aligned(8)))
|
||||||
|
Slater_inv,
|
||||||
|
double *__restrict determinant);
|
||||||
|
|
||||||
|
void detupd(const uint64_t Dim, const uint64_t LDS,
|
||||||
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
||||||
|
const uint64_t *__restrict Updates_index,
|
||||||
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
||||||
|
double *__restrict determinant);
|
||||||
|
|
||||||
|
uint32_t qmckl_sherman_morrison_later(
|
||||||
|
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
|
||||||
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
||||||
|
const uint64_t *__restrict Updates_index, const double breakdown,
|
||||||
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
||||||
|
double *__restrict determinant);
|
||||||
|
|
168
independent_test_harness/meuk.c
Normal file
168
independent_test_harness/meuk.c
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
#include "meuk.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
void print_matrix(double *A, const uint64_t LDS, const uint64_t Dim) {
|
||||||
|
for (uint64_t i = 0; i < LDS * Dim; i++) {
|
||||||
|
printf("%f\n", A[i]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
double frobenius_norm2(double *A, const uint64_t LDS, const uint64_t Dim) {
|
||||||
|
double sum2 = 0;
|
||||||
|
for (uint64_t i = 0; i < LDS * Dim; i++) sum2 += A[i] * A[i];
|
||||||
|
return sum2;
|
||||||
|
}
|
||||||
|
|
||||||
|
double frobenius_norm(double *A, const uint64_t LDS, const uint64_t Dim) {
|
||||||
|
double sum2 = frobenius_norm2(A, LDS, Dim);
|
||||||
|
return sqrt(sum2);
|
||||||
|
}
|
||||||
|
|
||||||
|
double max_norm(double *A, const uint64_t LDS, const uint64_t Dim) {
|
||||||
|
double largest = 0;
|
||||||
|
for (uint64_t i = 0; i < LDS * Dim; i++) {
|
||||||
|
double elm = A[i];
|
||||||
|
double felm = fabs(elm);
|
||||||
|
if (elm != elm) return -1.0; // Return a negative norm when NaN found
|
||||||
|
if (felm > largest) largest = felm;
|
||||||
|
}
|
||||||
|
return largest;
|
||||||
|
}
|
||||||
|
|
||||||
|
double condition_number(double *A, double *Ainv, const uint64_t LDS, const uint64_t Dim) {
|
||||||
|
double norm_A = frobenius_norm(A, LDS, Dim);
|
||||||
|
double norm_Ainv = frobenius_norm(Ainv, LDS, Dim);
|
||||||
|
return fabs(norm_A) * fabs(norm_Ainv);
|
||||||
|
}
|
||||||
|
|
||||||
|
void read_uint(hid_t file_id, const char *key, uint64_t *data) {
|
||||||
|
herr_t rc;
|
||||||
|
hid_t dataset_id = H5Dopen2(file_id, key, H5P_DEFAULT);
|
||||||
|
assert(dataset_id >= 0 && "H5Dopen2");
|
||||||
|
rc = H5Dread(dataset_id, H5T_NATIVE_ULONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, data);
|
||||||
|
assert(rc >= 0 && "H5Dread");
|
||||||
|
rc = H5Dclose(dataset_id);
|
||||||
|
assert(rc >= 0 && "H5Dclose");
|
||||||
|
}
|
||||||
|
|
||||||
|
void read_double(hid_t file_id, const char *key, double *data) {
|
||||||
|
herr_t rc;
|
||||||
|
hid_t dataset_id = H5Dopen2(file_id, key, H5P_DEFAULT);
|
||||||
|
assert(dataset_id >= 0 && "H5Dopen2");
|
||||||
|
rc = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data);
|
||||||
|
assert(rc >= 0 && "H5Dread");
|
||||||
|
rc = H5Dclose(dataset_id);
|
||||||
|
assert(rc >= 0 && "H5Dclose");
|
||||||
|
}
|
||||||
|
|
||||||
|
void update_slater_matrix(const uint64_t LDS, const uint64_t Dim,
|
||||||
|
const uint64_t N_updates, const double *Updates,
|
||||||
|
const uint64_t *Updates_index, double *Slater) {
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < N_updates; i++) {
|
||||||
|
uint32_t col = Updates_index[i] - 1;
|
||||||
|
for (uint32_t j = 0; j < Dim; j++) {
|
||||||
|
Slater[col * Dim + j] += Updates[i * LDS + j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t check_error(const uint64_t LDS, const uint64_t Dim, double *Slater_invT,
|
||||||
|
double *Slater, const double tolerance) {
|
||||||
|
|
||||||
|
double res[Dim*Dim];
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
|
for (uint32_t j = 0; j < Dim; j++) {
|
||||||
|
res[i * Dim + j] = 0;
|
||||||
|
for (uint32_t k = 0; k < Dim; k++) {
|
||||||
|
res[i * Dim + j] += Slater[i * Dim + k] * Slater_invT[k * LDS + j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
|
for (uint32_t j = 0; j < Dim; j++) {
|
||||||
|
double elm = res[i * Dim + j];
|
||||||
|
if (elm != elm) return 1; // found a NaN!
|
||||||
|
if (i == j && fabs(elm - 1.0) > tolerance) return 1;
|
||||||
|
if (i != j && fabs(elm) > tolerance) return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void matmul(double *a, double *b, double *prod, const uint64_t LDS, const uint64_t Dim) {
|
||||||
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
|
for (uint32_t j = 0; j < Dim; j++) {
|
||||||
|
prod[i * Dim + j] = 0;
|
||||||
|
for (uint32_t k = 0; k < Dim; k++) {
|
||||||
|
prod[i * Dim + j] += a[i * Dim + k] * b[k * LDS + j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t check_error_better(const double max, const double tolerance) {
|
||||||
|
if (max < 0) return -1; // When max was a NaN
|
||||||
|
else if (max < tolerance) return 0; // Good
|
||||||
|
else return 1; // Too big
|
||||||
|
}
|
||||||
|
|
||||||
|
void residual(double *a, double *res, const uint64_t Dim) {
|
||||||
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
|
for (uint32_t j = 0; j < Dim; j++) {
|
||||||
|
if (i == j) res[i * Dim + j] = a[i * Dim + j] - 1.0;
|
||||||
|
else res[i * Dim + j] = a[i * Dim + j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t test_kernel(char *version, const uint64_t LDS, const uint64_t Dim,
|
||||||
|
const uint64_t N_updates, const double *Updates,
|
||||||
|
const uint64_t *Updates_index, const double breakdown, const double tolerance,
|
||||||
|
double *Slater, double *Slater_inv, double *determinant) {
|
||||||
|
uint32_t rc = 0;
|
||||||
|
if (version[0] == 'a') { // Anthony
|
||||||
|
const double *Upds;
|
||||||
|
const uint64_t *Ui;
|
||||||
|
for (int i = 0; i < LDS * Dim; i++) Slater_inv[i] *= *determinant;
|
||||||
|
for (int j = 0; j < N_updates; j++) {
|
||||||
|
Upds = &Updates[j * LDS];
|
||||||
|
Ui = &Updates_index[j];
|
||||||
|
detupd(Dim, LDS, Upds, Ui, Slater_inv, determinant);
|
||||||
|
if (determinant == 0) printf("TEST_KERNEL: det_update21 failed\n");
|
||||||
|
}
|
||||||
|
for (int i = 0; i < LDS * Dim; i++) Slater_inv[i] /= *determinant;
|
||||||
|
update_slater_matrix(LDS, Dim, N_updates, Updates, Updates_index, Slater);
|
||||||
|
rc = check_error(LDS, Dim, Slater_inv, Slater, tolerance);
|
||||||
|
if (rc != 0) printf("TEST_KERNEL: check_error failed\n");
|
||||||
|
} else if (version[0] == 'n') { // Naive
|
||||||
|
rc = qmckl_sherman_morrison(LDS, Dim, N_updates, Updates, Updates_index,
|
||||||
|
breakdown, Slater_inv, determinant);
|
||||||
|
if (rc != 0) printf("TEST_KERNEL: qmckl_sherman_morrison failed\n");
|
||||||
|
update_slater_matrix(LDS, Dim, N_updates, Updates, Updates_index, Slater);
|
||||||
|
rc = check_error(LDS, Dim, Slater_inv, Slater, tolerance);
|
||||||
|
if (rc != 0) printf("TEST_KERNEL: check_error failed\n");
|
||||||
|
} else if (version[0] == 's') { // Splitting
|
||||||
|
rc = qmckl_sherman_morrison_splitting(LDS, Dim, N_updates, Updates,
|
||||||
|
Updates_index, breakdown, Slater_inv,
|
||||||
|
determinant);
|
||||||
|
if (rc != 0) printf("TEST_KERNEL: qmckl_sherman_morrison_splitting failed\n");
|
||||||
|
update_slater_matrix(LDS, Dim, N_updates, Updates, Updates_index, Slater);
|
||||||
|
rc = check_error(LDS, Dim, Slater, Slater_inv, tolerance);
|
||||||
|
if (rc != 0) printf("TEST_KERNEL: check_error failed\n");
|
||||||
|
} else if (version[0] == 'b') { // Blocked
|
||||||
|
rc = qmckl_sherman_morrison_smw32s(LDS, Dim, N_updates, Updates,
|
||||||
|
Updates_index, breakdown, Slater_inv,
|
||||||
|
determinant);
|
||||||
|
if (rc != 0) printf("TEST_KERNEL: qmckl_sherman_morrison_smw32s failed\n");
|
||||||
|
update_slater_matrix(LDS, Dim, N_updates, Updates, Updates_index, Slater);
|
||||||
|
rc = check_error(LDS, Dim, Slater, Slater_inv, tolerance);
|
||||||
|
if (rc != 0) printf("TEST_KERNEL: check_error failed\n");
|
||||||
|
}
|
||||||
|
return rc;
|
||||||
|
}
|
43
independent_test_harness/meuk.h
Normal file
43
independent_test_harness/meuk.h
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
#include <math.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "hdf5.h"
|
||||||
|
#include "kernels.h"
|
||||||
|
|
||||||
|
typedef struct Error {
|
||||||
|
uint32_t rc;
|
||||||
|
uint64_t error;
|
||||||
|
} Error;
|
||||||
|
|
||||||
|
void matmul(double *a, double *b, double *prod, const uint64_t LDS, const uint64_t Dim);
|
||||||
|
void residual(double *a, double *res, const uint64_t Dim);
|
||||||
|
double frobenius_norm2(double *A, const uint64_t LDS, const uint64_t Dim);
|
||||||
|
void print_matrix(double *A, const uint64_t LDS, const uint64_t Dim);
|
||||||
|
double frobenius_norm(double *A, const uint64_t LDS, const uint64_t Dim);
|
||||||
|
double max_norm(double *A, const uint64_t LDS, const uint64_t Dim);
|
||||||
|
double condition_number(double *A, double *Ainv, const uint64_t LDS, const uint64_t Dim);
|
||||||
|
void read_uint(hid_t file_id, const char *key, uint64_t *data);
|
||||||
|
void read_double(hid_t file_id, const char *key, double *data);
|
||||||
|
|
||||||
|
static __inline__ uint64_t rdtsc(void) {
|
||||||
|
unsigned hi, lo;
|
||||||
|
__asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
|
||||||
|
return ((unsigned long long)lo) | (((unsigned long long)hi) << 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
void update_slater_matrix(const uint64_t LDS, const uint64_t Dim,
|
||||||
|
const uint64_t N_updates, const double *Updates,
|
||||||
|
const uint64_t *Updates_index, double *Slater);
|
||||||
|
|
||||||
|
uint32_t check_error(const uint64_t LDS, const uint64_t Dim, double *Slater_invT,
|
||||||
|
double *Slater, const double tolerance);
|
||||||
|
|
||||||
|
int32_t check_error_better(const double max, const double tolerance);
|
||||||
|
|
||||||
|
uint32_t test_kernel(char *version, const uint64_t LDS, const uint64_t Dim,
|
||||||
|
const uint64_t N_updates, const double *Updates,
|
||||||
|
const uint64_t *Updates_index, const double breakdown, const double tolerance,
|
||||||
|
double *Slater, double *Slater_inv, double *determinant);
|
33
independent_test_harness/run_tests.sh
Executable file
33
independent_test_harness/run_tests.sh
Executable file
@ -0,0 +1,33 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
INDIR="cycles_329_dets"
|
||||||
|
# INDIR="cycles_15784_dets"
|
||||||
|
OUTDIR=$1
|
||||||
|
|
||||||
|
mkdir -v ${OUTDIR}
|
||||||
|
cp -av get_stats.m ${OUTDIR}
|
||||||
|
|
||||||
|
## All cycles
|
||||||
|
ln -svf ${INDIR}/all_cycles.h cycles.h
|
||||||
|
make clean && make
|
||||||
|
./test a > ${OUTDIR}/ANTHONY.dat
|
||||||
|
./test n > ${OUTDIR}/NAIVE.dat
|
||||||
|
./test l > ${OUTDIR}/LATER.dat
|
||||||
|
./test s > ${OUTDIR}/SPLITTING.dat
|
||||||
|
./test b > ${OUTDIR}/BLOCKED.dat
|
||||||
|
./test m > ${OUTDIR}/MKL_LAPACK.dat
|
||||||
|
|
||||||
|
## Cycles w/ 2 upds excl. w/ WB2
|
||||||
|
ln -svf ${INDIR}/2_cycles.h cycles.h
|
||||||
|
make clean && make
|
||||||
|
./test 2 > ${OUTDIR}/WB2.dat
|
||||||
|
|
||||||
|
## Cycles w/ 3 upds excl. w/ WB3
|
||||||
|
ln -svf ${INDIR}/3_cycles.h cycles.h
|
||||||
|
make clean && make
|
||||||
|
./test 3 > ${OUTDIR}/WB3.dat
|
||||||
|
|
||||||
|
make clean
|
||||||
|
rm cycles.h
|
||||||
|
|
||||||
|
(cd ${OUTDIR} && ./get_stats.m)
|
632
independent_test_harness/sm.c
Normal file
632
independent_test_harness/sm.c
Normal file
@ -0,0 +1,632 @@
|
|||||||
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "kernels.h"
|
||||||
|
|
||||||
|
extern uint64_t n_splits;
|
||||||
|
extern uint64_t block_fail;
|
||||||
|
extern uint64_t recursive_calls;
|
||||||
|
|
||||||
|
uint32_t qmckl_sherman_morrison(
|
||||||
|
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
|
||||||
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
||||||
|
const uint64_t *__restrict Updates_index, const double breakdown,
|
||||||
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
||||||
|
double *__restrict determinant) {
|
||||||
|
|
||||||
|
const uint32_t Dim = 21;
|
||||||
|
const uint32_t LDS = 24;
|
||||||
|
|
||||||
|
double __attribute__((aligned(8))) C[Dim];
|
||||||
|
double __attribute__((aligned(8))) D[LDS];
|
||||||
|
|
||||||
|
uint32_t l = 0;
|
||||||
|
// For each update
|
||||||
|
while (l < N_updates) {
|
||||||
|
// C = S^{-1} x u_l
|
||||||
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
|
C[i] = 0.0;
|
||||||
|
#pragma ivdep
|
||||||
|
#pragma vector aligned, novecremainder
|
||||||
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
|
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; // regular mat-vec product, but actually working on S_inv^T * U_l.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Denominator: v_l^T * C
|
||||||
|
const int cui = Updates_index[l] - 1;
|
||||||
|
double den = 1.0 + C[cui];
|
||||||
|
|
||||||
|
if (fabs(den) < breakdown) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
double iden = 1.0 / den;
|
||||||
|
|
||||||
|
// Update det(A)
|
||||||
|
if (!determinant)
|
||||||
|
*determinant *= den;
|
||||||
|
|
||||||
|
#pragma ivdep
|
||||||
|
#pragma vector aligned, novecremainder
|
||||||
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
|
D[j] = Slater_inv[cui * LDS + j]; // selecting proper column of v_l^T * S_inv
|
||||||
|
}
|
||||||
|
|
||||||
|
// A^{-1} = A^{-1} - C x D / den
|
||||||
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
|
#pragma ivdep
|
||||||
|
#pragma vector aligned, novecremainder
|
||||||
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
|
const double update = C[i] * D[j] * iden;
|
||||||
|
Slater_inv[i * LDS + j] -= update;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
l += 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t qmckl_woodbury_2(const uint64_t vLDS, const uint64_t vDim,
|
||||||
|
const double *__restrict __attribute__((aligned(8)))
|
||||||
|
Updates,
|
||||||
|
const uint64_t *__restrict Updates_index,
|
||||||
|
const double breakdown,
|
||||||
|
double *__restrict __attribute__((aligned(8)))
|
||||||
|
Slater_inv,
|
||||||
|
double *__restrict determinant) {
|
||||||
|
const uint32_t Dim = 21;
|
||||||
|
const uint32_t LDS = 24;
|
||||||
|
/*
|
||||||
|
COMPUTE S^{-1}P - CB^{-1}D : Dim x LDS,
|
||||||
|
where S^{-1}P : Dim x LDS,
|
||||||
|
C := S^{-1}PP^TU : Dim x 2,
|
||||||
|
B := 1 + VC : 2 x 2,
|
||||||
|
D := VS^{-1}P : 2 x LDS,
|
||||||
|
P^TU : LDS x 2,
|
||||||
|
V : 2 x Dim
|
||||||
|
*/
|
||||||
|
|
||||||
|
const uint32_t row1 = (Updates_index[0] - 1);
|
||||||
|
const uint32_t row2 = (Updates_index[1] - 1);
|
||||||
|
|
||||||
|
// Compute C = (S^T)^{-1}U : Dim x 2
|
||||||
|
double __attribute__((aligned(8))) C[2 * Dim];
|
||||||
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
|
C[i * 2] = 0;
|
||||||
|
C[i * 2 + 1] = 0;
|
||||||
|
#pragma ivdep
|
||||||
|
#pragma vector aligned, novecremainder
|
||||||
|
for (uint32_t k = 0; k < LDS; k++) {
|
||||||
|
C[i * 2] += Slater_inv[i * LDS + k] * Updates[k];
|
||||||
|
C[i * 2 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// const double alpha = 1.0, beta = 0.0;
|
||||||
|
// const bool TransA = true, TransB = false;
|
||||||
|
// (void) cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
|
||||||
|
// Dim, 2, LDS, alpha, Slater_inv, LDS, Updates, LDS, beta,
|
||||||
|
// C, 2);
|
||||||
|
// (void) qmckl_dgemm(context, CblasNoTrans, CblasTrans,
|
||||||
|
// 2, Dim, LDS, alpha, Updates, LDS, Slater_inv, LDS, beta,
|
||||||
|
// C, 2);
|
||||||
|
// (void) qmckl_dgemm(context, TransA, TransB,
|
||||||
|
// 2, Dim, LDS, alpha, Updates, LDS, Slater_inv, LDS,
|
||||||
|
// beta, C, 2);
|
||||||
|
|
||||||
|
// Compute B = 1 + VC : 2 x 2
|
||||||
|
const double B0 = C[row1 * 2] + 1;
|
||||||
|
const double B1 = C[row1 * 2 + 1];
|
||||||
|
const double B2 = C[row2 * 2];
|
||||||
|
const double B3 = C[row2 * 2 + 1] + 1;
|
||||||
|
|
||||||
|
// Check if determinant of inverted matrix is not zero
|
||||||
|
double det = B0 * B3 - B1 * B2;
|
||||||
|
if (fabs(det) < breakdown) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update det(S) when passed
|
||||||
|
if (determinant != NULL)
|
||||||
|
*determinant *= det;
|
||||||
|
|
||||||
|
// Compute B^{-1} with explicit formula for 2 x 2 inversion
|
||||||
|
double __attribute__((aligned(8))) Binv[4], idet = 1.0 / det;
|
||||||
|
Binv[0] = idet * B3;
|
||||||
|
Binv[1] = -1.0 * idet * B1;
|
||||||
|
Binv[2] = -1.0 * idet * B2;
|
||||||
|
Binv[3] = idet * B0;
|
||||||
|
|
||||||
|
// tmp = B^{-1}D : 2 x LDS
|
||||||
|
double __attribute__((aligned(8))) tmp[2 * LDS];
|
||||||
|
double *__restrict r1dim = &(Slater_inv[row1 * LDS]);
|
||||||
|
double *__restrict r2dim = &(Slater_inv[row2 * LDS]);
|
||||||
|
#pragma ivdep
|
||||||
|
#pragma vector aligned, novecremainder
|
||||||
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
|
||||||
|
tmp[LDS + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute (S^T)^{-1} - C * tmp : Dim x LDS
|
||||||
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
|
#pragma ivdep
|
||||||
|
#pragma vector aligned, novecremainder
|
||||||
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
|
Slater_inv[i * LDS + j] -= C[i * 2] * tmp[j];
|
||||||
|
Slater_inv[i * LDS + j] -= C[i * 2 + 1] * tmp[LDS + j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t qmckl_woodbury_3(const uint64_t vLDS, const uint64_t vDim,
|
||||||
|
const double *__restrict __attribute__((aligned(8)))
|
||||||
|
Updates,
|
||||||
|
const uint64_t *__restrict Updates_index,
|
||||||
|
const double breakdown,
|
||||||
|
double *__restrict __attribute__((aligned(8)))
|
||||||
|
Slater_inv,
|
||||||
|
double *__restrict determinant) {
|
||||||
|
const uint32_t Dim = 21;
|
||||||
|
const uint32_t LDS = 24;
|
||||||
|
/*
|
||||||
|
COMPUTE (S^T)^{-1} - CB^{-1}D : Dim x LDS,
|
||||||
|
where S^T : Dim x LDS,
|
||||||
|
C := (S^T)^{-1}U : Dim x 3,
|
||||||
|
B := 1 + VC : 3 x 3,
|
||||||
|
D := V(S^T)^{-1} : 3 x LDS,
|
||||||
|
U : LDS x 3,
|
||||||
|
V : 3 x Dim
|
||||||
|
*/
|
||||||
|
|
||||||
|
const uint32_t row1 = (Updates_index[0] - 1);
|
||||||
|
const uint32_t row2 = (Updates_index[1] - 1);
|
||||||
|
const uint32_t row3 = (Updates_index[2] - 1);
|
||||||
|
|
||||||
|
// Compute C = (S^T)^{-1}U : Dim x 3
|
||||||
|
double __attribute__((aligned(8))) C[3 * Dim];
|
||||||
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
|
C[i * 3] = 0;
|
||||||
|
C[i * 3 + 1] = 0;
|
||||||
|
C[i * 3 + 2] = 0;
|
||||||
|
#pragma ivdep
|
||||||
|
#pragma vector aligned, novecremainder
|
||||||
|
for (uint32_t k = 0; k < LDS; k++) {
|
||||||
|
C[i * 3] += Slater_inv[i * LDS + k] * Updates[k];
|
||||||
|
C[i * 3 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
|
||||||
|
C[i * 3 + 2] += Slater_inv[i * LDS + k] * Updates[2 * LDS + k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// double alpha = 1.0, beta = 0.0;
|
||||||
|
// cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
|
||||||
|
// Dim, 3, LDS, alpha, Slater_inv, LDS, Updates, LDS, beta,
|
||||||
|
// C, 3);
|
||||||
|
|
||||||
|
// Compute B = 1 + VC : 3 x 3
|
||||||
|
const double B0 = C[row1 * 3] + 1;
|
||||||
|
const double B1 = C[row1 * 3 + 1];
|
||||||
|
const double B2 = C[row1 * 3 + 2];
|
||||||
|
const double B3 = C[row2 * 3];
|
||||||
|
const double B4 = C[row2 * 3 + 1] + 1;
|
||||||
|
const double B5 = C[row2 * 3 + 2];
|
||||||
|
const double B6 = C[row3 * 3];
|
||||||
|
const double B7 = C[row3 * 3 + 1];
|
||||||
|
const double B8 = C[row3 * 3 + 2] + 1;
|
||||||
|
|
||||||
|
// Check if determinant of B is not too close to zero
|
||||||
|
double det;
|
||||||
|
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
|
||||||
|
B2 * (B3 * B7 - B4 * B6);
|
||||||
|
if (fabs(det) < breakdown) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update det(Slater) if passed
|
||||||
|
if (determinant != NULL)
|
||||||
|
*determinant *= det;
|
||||||
|
|
||||||
|
// Compute B^{-1} with explicit formula for 3 x 3 inversion
|
||||||
|
double __attribute__((aligned(8))) Binv[9], idet = 1.0 / det;
|
||||||
|
Binv[0] = (B4 * B8 - B7 * B5) * idet;
|
||||||
|
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
|
||||||
|
Binv[2] = (B1 * B5 - B4 * B2) * idet;
|
||||||
|
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
|
||||||
|
Binv[4] = (B0 * B8 - B6 * B2) * idet;
|
||||||
|
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
|
||||||
|
Binv[6] = (B3 * B7 - B6 * B4) * idet;
|
||||||
|
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
|
||||||
|
Binv[8] = (B0 * B4 - B3 * B1) * idet;
|
||||||
|
|
||||||
|
// tmp = B^{-1}D : 3 x LDS
|
||||||
|
double __attribute__((aligned(8))) tmp[3 * LDS];
|
||||||
|
double *__restrict r1dim = &(Slater_inv[row1 * LDS]);
|
||||||
|
double *__restrict r2dim = &(Slater_inv[row2 * LDS]);
|
||||||
|
double *__restrict r3dim = &(Slater_inv[row3 * LDS]);
|
||||||
|
#pragma ivdep
|
||||||
|
#pragma vector aligned, novecremainder
|
||||||
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
|
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
|
||||||
|
tmp[LDS + j] = Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
|
||||||
|
tmp[2 * LDS + j] = Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute (S^T)^{-1} - C * tmp : Dim x LDS
|
||||||
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
|
#pragma ivdep
|
||||||
|
#pragma vector aligned, novecremainder
|
||||||
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
|
Slater_inv[i * LDS + j] -= C[i * 3] * tmp[j];
|
||||||
|
Slater_inv[i * LDS + j] -= C[i * 3 + 1] * tmp[LDS + j];
|
||||||
|
Slater_inv[i * LDS + j] -= C[i * 3 + 2] * tmp[2 * LDS + j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t qmckl_slagel_splitting(
|
||||||
|
const uint64_t vLDS, const uint64_t vDim, uint64_t N_updates,
|
||||||
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
||||||
|
const uint64_t *__restrict Updates_index, const double breakdown,
|
||||||
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
||||||
|
double *__restrict __attribute__((aligned(8))) later_updates,
|
||||||
|
uint64_t *__restrict later_index, uint64_t *__restrict later,
|
||||||
|
double *__restrict determinant) {
|
||||||
|
|
||||||
|
const uint32_t LDS = 24;
|
||||||
|
const uint32_t Dim = 21;
|
||||||
|
|
||||||
|
// double __attribute__((aligned(8))) C[N_updates * Dim];
|
||||||
|
double __attribute__((aligned(8))) C[Dim];
|
||||||
|
double __attribute__((aligned(8))) D[LDS];
|
||||||
|
|
||||||
|
uint32_t l = 0;
|
||||||
|
// For each update
|
||||||
|
while (l < N_updates) {
|
||||||
|
// C = S^{-1} x U_l
|
||||||
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
|
C[i] = 0.0;
|
||||||
|
#pragma ivdep
|
||||||
|
#pragma vector aligned, novecremainder
|
||||||
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
|
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; // regular mat-vec product, but actually working on S_inv^T * U_l.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Denominator
|
||||||
|
const int cui = Updates_index[l] - 1;
|
||||||
|
double den = 1.0 + C[cui];
|
||||||
|
// printf("test breakdown = %f, den = %f, C[cui] = %f, cui = %d\n", breakdown, fabs(den), C[cui], cui);
|
||||||
|
if (fabs(den) < breakdown) { // Here is decided to split the update, or not.
|
||||||
|
// printf("Split! breakdown = %f\n", breakdown);
|
||||||
|
n_splits += 1;
|
||||||
|
|
||||||
|
// U_l = U_l / 2: split the update in 2 equal halves and save the second halve
|
||||||
|
// in later_updates
|
||||||
|
#pragma ivdep
|
||||||
|
#pragma vector aligned, novecremainder
|
||||||
|
for (uint32_t i = 0; i < LDS; i++) {
|
||||||
|
later_updates[*later * LDS + i] = Updates[l * LDS + i] / 2.0;
|
||||||
|
C[i] /= 2.0;
|
||||||
|
}
|
||||||
|
later_index[*later] = Updates_index[l];
|
||||||
|
(*later)++;
|
||||||
|
|
||||||
|
den = 1.0 + C[cui];
|
||||||
|
} // From here onwards we continue with applying the first halve of the update to Slater_inv
|
||||||
|
double iden = 1.0 / den;
|
||||||
|
|
||||||
|
if (!determinant) *determinant *= den;
|
||||||
|
|
||||||
|
// D = v^T x S^{-1} : 1 x LDS
|
||||||
|
#pragma ivdep
|
||||||
|
#pragma vector aligned, novecremainder
|
||||||
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
|
D[j] = Slater_inv[cui * LDS + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
// S^{-1} = S^{-1} - C x D / den
|
||||||
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
|
#pragma ivdep
|
||||||
|
#pragma vector aligned, novecremainder
|
||||||
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
|
const double update = C[i] * D[j] * iden;
|
||||||
|
Slater_inv[i * LDS + j] -= update;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
l += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t qmckl_sherman_morrison_splitting(
|
||||||
|
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
|
||||||
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
||||||
|
const uint64_t *__restrict Updates_index, const double breakdown,
|
||||||
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
||||||
|
double *__restrict determinant) {
|
||||||
|
|
||||||
|
const uint32_t Dim = 21;
|
||||||
|
const uint32_t LDS = 24;
|
||||||
|
|
||||||
|
double __attribute__((aligned(8))) later_updates[LDS * N_updates];
|
||||||
|
uint64_t later_index[N_updates];
|
||||||
|
uint64_t later = 0;
|
||||||
|
uint32_t rc;
|
||||||
|
|
||||||
|
rc = qmckl_slagel_splitting(LDS, Dim, N_updates, Updates, Updates_index,
|
||||||
|
breakdown, Slater_inv, later_updates, later_index,
|
||||||
|
&later, determinant);
|
||||||
|
// if (rc != 0) printf("Something when catastrophically wrong in QMCKL_SLAGEL_SPLITTING\n");
|
||||||
|
|
||||||
|
if (later > 0) {
|
||||||
|
recursive_calls++;
|
||||||
|
// printf("Later > 0\n");
|
||||||
|
rc = qmckl_sherman_morrison_splitting(LDS, Dim, later, later_updates,
|
||||||
|
later_index, breakdown, Slater_inv,
|
||||||
|
determinant);
|
||||||
|
// if (rc != 0) printf("Something when catastrophically wrong in QMCKL_SHERMAN_MORRISON_SPLITTING\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t qmckl_sherman_morrison_smw32s(
|
||||||
|
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
|
||||||
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
||||||
|
const uint64_t *__restrict Updates_index, const double breakdown,
|
||||||
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
||||||
|
double *__restrict determinant) {
|
||||||
|
|
||||||
|
const uint32_t Dim = 21;
|
||||||
|
const uint32_t LDS = 24;
|
||||||
|
|
||||||
|
double __attribute__((aligned(8))) later_updates[LDS * N_updates];
|
||||||
|
uint64_t later_index[N_updates];
|
||||||
|
uint64_t later = 0;
|
||||||
|
uint32_t rc;
|
||||||
|
|
||||||
|
if (N_updates == 4) { // Special case for 4 rank-1 updates: 2+2
|
||||||
|
rc = qmckl_woodbury_2(LDS, Dim, Updates, Updates_index,
|
||||||
|
breakdown, Slater_inv, determinant);
|
||||||
|
if (rc != 0) { // Send the entire block to slagel_splitting
|
||||||
|
block_fail += 1;
|
||||||
|
uint64_t l = 0;
|
||||||
|
rc = qmckl_slagel_splitting(LDS, Dim, 2, Updates,
|
||||||
|
Updates_index, breakdown, Slater_inv,
|
||||||
|
later_updates + (LDS * later),
|
||||||
|
later_index + later, &l, determinant);
|
||||||
|
later += l;
|
||||||
|
}
|
||||||
|
rc = qmckl_woodbury_2(LDS, Dim, &Updates[2*LDS], &Updates_index[2],
|
||||||
|
breakdown, Slater_inv, determinant);
|
||||||
|
if (rc != 0) { // Send the entire block to slagel_splitting
|
||||||
|
block_fail += 1;
|
||||||
|
uint64_t l = 0;
|
||||||
|
rc = qmckl_slagel_splitting(LDS, Dim, 2, &Updates[2*LDS],
|
||||||
|
&Updates_index[2], breakdown, Slater_inv,
|
||||||
|
later_updates + (LDS * later),
|
||||||
|
later_index + later, &l, determinant);
|
||||||
|
later += l;
|
||||||
|
}
|
||||||
|
if (later > 0) {
|
||||||
|
recursive_calls++;
|
||||||
|
rc = qmckl_sherman_morrison_splitting(LDS, Dim, later, later_updates,
|
||||||
|
later_index, breakdown, Slater_inv,
|
||||||
|
determinant);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if (N_updates == 6) { // Special case for 6 rank-1 updates: 2+2+2
|
||||||
|
// rc = qmckl_woodbury_2(LDS, Dim, Updates, Updates_index,
|
||||||
|
// breakdown, Slater_inv, determinant);
|
||||||
|
// if (rc != 0) { // Send the entire block to slagel_splitting
|
||||||
|
// block_fail += 1;
|
||||||
|
// uint64_t l = 0;
|
||||||
|
// rc = qmckl_slagel_splitting(LDS, Dim, 2, Updates,
|
||||||
|
// Updates_index, breakdown, Slater_inv,
|
||||||
|
// later_updates + (LDS * later),
|
||||||
|
// later_index + later, &l, determinant);
|
||||||
|
// later += l;
|
||||||
|
// }
|
||||||
|
// rc = qmckl_woodbury_2(LDS, Dim, &Updates[2*LDS], &Updates_index[2],
|
||||||
|
// breakdown, Slater_inv, determinant);
|
||||||
|
// if (rc != 0) { // Send the entire block to slagel_splitting
|
||||||
|
// block_fail += 1;
|
||||||
|
// uint64_t l = 0;
|
||||||
|
// rc = qmckl_slagel_splitting(LDS, Dim, 2, &Updates[2*LDS],
|
||||||
|
// &Updates_index[2], breakdown, Slater_inv,
|
||||||
|
// later_updates + (LDS * later),
|
||||||
|
// later_index + later, &l, determinant);
|
||||||
|
// later += l;
|
||||||
|
// }
|
||||||
|
// rc = qmckl_woodbury_2(LDS, Dim, &Updates[4*LDS], &Updates_index[4],
|
||||||
|
// breakdown, Slater_inv, determinant);
|
||||||
|
// if (rc != 0) { // Send the entire block to slagel_splitting
|
||||||
|
// block_fail += 1;
|
||||||
|
// uint64_t l = 0;
|
||||||
|
// rc = qmckl_slagel_splitting(LDS, Dim, 2, &Updates[4*LDS],
|
||||||
|
// &Updates_index[4], breakdown, Slater_inv,
|
||||||
|
// later_updates + (LDS * later),
|
||||||
|
// later_index + later, &l, determinant);
|
||||||
|
// later += l;
|
||||||
|
// }
|
||||||
|
// if (later > 0) {
|
||||||
|
// recursive_calls++;
|
||||||
|
// rc = qmckl_sherman_morrison_splitting(LDS, Dim, later, later_updates,
|
||||||
|
// later_index, breakdown, Slater_inv,
|
||||||
|
// determinant);
|
||||||
|
// }
|
||||||
|
// return 0;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// And for the other cases != 4, 6
|
||||||
|
// Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with
|
||||||
|
// Woodbury 3x3 kernel
|
||||||
|
uint32_t n_of_3blocks = N_updates / 3;
|
||||||
|
uint32_t remainder = N_updates % 3;
|
||||||
|
uint32_t length_3block = 3 * LDS;
|
||||||
|
|
||||||
|
if (n_of_3blocks > 0) {
|
||||||
|
for (uint32_t i = 0; i < n_of_3blocks; i++) {
|
||||||
|
const double *Updates_3block = &Updates[i * length_3block];
|
||||||
|
const uint64_t *Updates_index_3block = &Updates_index[i * 3];
|
||||||
|
rc = qmckl_woodbury_3(LDS, Dim, Updates_3block, Updates_index_3block,
|
||||||
|
breakdown, Slater_inv, determinant);
|
||||||
|
if (rc != 0) { // Send the entire block to slagel_splitting
|
||||||
|
// printf("QMCKL_WOODBURY_3 failed. Sending to QMCKL_SLAGEL_SPLITTING\n");
|
||||||
|
block_fail += 1;
|
||||||
|
uint64_t l = 0;
|
||||||
|
rc = qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block,
|
||||||
|
Updates_index_3block, breakdown, Slater_inv,
|
||||||
|
later_updates + (LDS * later),
|
||||||
|
later_index + later, &l, determinant);
|
||||||
|
// if (rc != 0) printf("Something when catastrophically wrong in QMCKL_SLAGEL_SPLITTING\n");
|
||||||
|
later += l;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply last remaining block of 2 updates with Woodbury 2x2 kernel
|
||||||
|
if (remainder == 2) {
|
||||||
|
const double *Updates_2block = &Updates[n_of_3blocks * length_3block];
|
||||||
|
const uint64_t *Updates_index_2block = &Updates_index[3 * n_of_3blocks];
|
||||||
|
rc = qmckl_woodbury_2(LDS, Dim, Updates_2block, Updates_index_2block,
|
||||||
|
breakdown, Slater_inv, determinant);
|
||||||
|
if (rc != 0) { // Send the entire block to slagel_splitting
|
||||||
|
// printf("QMCKL_WOODBURY_2 failed. Sending to QMCKL_SLAGEL_SPLITTING\n");
|
||||||
|
block_fail += 1;
|
||||||
|
uint64_t l = 0;
|
||||||
|
rc = qmckl_slagel_splitting(LDS, Dim, 2, Updates_2block,
|
||||||
|
Updates_index_2block, breakdown, Slater_inv,
|
||||||
|
later_updates + (LDS * later),
|
||||||
|
later_index + later, &l, determinant);
|
||||||
|
// if (rc != 0) printf("Something when catastrophically wrong in QMCKL_SLAGEL_SPLITTING\n");
|
||||||
|
later += l;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply last remaining update with slagel_splitting
|
||||||
|
if (remainder == 1) {
|
||||||
|
// // printf("Sending single update to QMCKL_SLAGEL_SPLITTING\n");
|
||||||
|
const double *Updates_1block = &Updates[n_of_3blocks * length_3block];
|
||||||
|
const uint64_t *Updates_index_1block = &Updates_index[3 * n_of_3blocks];
|
||||||
|
uint64_t l = 0;
|
||||||
|
rc = qmckl_slagel_splitting(LDS, Dim, 1, Updates_1block,
|
||||||
|
Updates_index_1block, breakdown, Slater_inv,
|
||||||
|
later_updates + (LDS * later),
|
||||||
|
later_index + later, &l, determinant);
|
||||||
|
// if (rc != 0) printf("Something when catastrophically wrong in QMCKL_SLAGEL_SPLITTING\n");
|
||||||
|
later += l;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (later > 0) {
|
||||||
|
recursive_calls++;
|
||||||
|
// printf("Sending remaining updates to QMCKL_SHERMAN_MORRISON_SPLITTING\n");
|
||||||
|
rc = qmckl_sherman_morrison_splitting(LDS, Dim, later, later_updates,
|
||||||
|
later_index, breakdown, Slater_inv,
|
||||||
|
determinant);
|
||||||
|
// if (rc != 0) printf("Something when catastrophically wrong in QMCKL_SHERMAN_MORRISON_SPLITTING\n");
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sherman Morrison, leaving zero denominators for later
|
||||||
|
uint32_t qmckl_sherman_morrison_later(
|
||||||
|
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
|
||||||
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
||||||
|
const uint64_t *__restrict Updates_index, const double breakdown,
|
||||||
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
||||||
|
double *__restrict determinant) {
|
||||||
|
|
||||||
|
const uint32_t Dim = 21;
|
||||||
|
const uint32_t LDS = 24;
|
||||||
|
|
||||||
|
double __attribute__((aligned(8))) C[Dim];
|
||||||
|
double __attribute__((aligned(8))) D[LDS];
|
||||||
|
|
||||||
|
double __attribute__((aligned(8))) later_updates[LDS * N_updates];
|
||||||
|
uint64_t later_index[N_updates];
|
||||||
|
uint64_t later = 0;
|
||||||
|
|
||||||
|
uint32_t l = 0;
|
||||||
|
// For each update
|
||||||
|
while (l < N_updates) {
|
||||||
|
|
||||||
|
// C = A^{-1} x U_l
|
||||||
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
|
C[i] = 0.0;
|
||||||
|
#pragma ivdep
|
||||||
|
#pragma vector aligned, novecremainder
|
||||||
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
|
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; // regular mat-vec product, but actually working on S_inv^T * U_l.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Denominator
|
||||||
|
const int cui = Updates_index[l] - 1;
|
||||||
|
double den = 1.0 + C[cui];
|
||||||
|
if (fabs(den) < breakdown) {
|
||||||
|
#pragma ivdep
|
||||||
|
#pragma vector aligned, novecremainder
|
||||||
|
// for (uint32_t i = 0; i < Dim; i++) {
|
||||||
|
for (uint32_t i = 0; i < LDS; i++) {
|
||||||
|
later_updates[later * LDS + i] = Updates[l * LDS + i];
|
||||||
|
}
|
||||||
|
later_index[later] = Updates_index[l];
|
||||||
|
later++;
|
||||||
|
l += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
double iden = 1.0 / den;
|
||||||
|
|
||||||
|
if (!determinant) *determinant *= den;
|
||||||
|
|
||||||
|
// D = v^T x A^{-1}
|
||||||
|
#pragma ivdep
|
||||||
|
#pragma vector aligned, novecremainder
|
||||||
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
|
D[j] = Slater_inv[cui * LDS + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
// S^{-1} = S^{-1} - C x D / den
|
||||||
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
|
#pragma ivdep
|
||||||
|
#pragma vector aligned, novecremainder
|
||||||
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
|
const double update = C[i] * D[j] * iden;
|
||||||
|
Slater_inv[i * LDS + j] -= update;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
l += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (later == N_updates) { // If all the updates have failed, exit early with an error
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
else if (later > 0) { // If some have failed, make a recursive call
|
||||||
|
recursive_calls++;
|
||||||
|
(void) qmckl_sherman_morrison_later(LDS, Dim, later, later_updates,
|
||||||
|
later_index, breakdown, Slater_inv, determinant);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inplace inverse n x n matrix A.
|
||||||
|
// returns:
|
||||||
|
// ret = 0 on success
|
||||||
|
// ret < 0 illegal argument value
|
||||||
|
// ret > 0 singular matrix
|
||||||
|
lapack_int inverse(double *a, uint64_t m, uint64_t n) {
|
||||||
|
int ipiv[m + 1];
|
||||||
|
lapack_int ret;
|
||||||
|
ret = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, m, n, a, n, ipiv);
|
||||||
|
if (ret != 0) return ret;
|
||||||
|
ret = LAPACKE_dgetri(LAPACK_ROW_MAJOR, n, a, n, ipiv);
|
||||||
|
return ret;
|
||||||
|
}
|
312
independent_test_harness/test.c
Normal file
312
independent_test_harness/test.c
Normal file
@ -0,0 +1,312 @@
|
|||||||
|
#include "meuk.h"
|
||||||
|
#include "cycles.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#define DATASET "dataset_329d_zeropadded_cm.hdf5"
|
||||||
|
// #define DATASET "dataset_15784d_zeropadded_cm.hdf5"
|
||||||
|
#define REPETITIONS 100000
|
||||||
|
|
||||||
|
uint64_t n_splits;
|
||||||
|
uint64_t block_fail;
|
||||||
|
uint64_t recursive_calls;
|
||||||
|
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
assert(argc == 2);
|
||||||
|
char *version = argv[1];
|
||||||
|
|
||||||
|
// SETUP STORAGE AND DATA ACCESS
|
||||||
|
hid_t file_id, dataset_id;
|
||||||
|
herr_t status;
|
||||||
|
file_id = H5Fopen(DATASET, H5F_ACC_RDONLY, H5P_DEFAULT);
|
||||||
|
char nupds_key[32];
|
||||||
|
char upd_idx_key[32];
|
||||||
|
char upds_key[32];
|
||||||
|
char slater_key[32];
|
||||||
|
char slater_inv_key[32];
|
||||||
|
char det_key[32];
|
||||||
|
const uint64_t Dim = 21;
|
||||||
|
const uint64_t LDS = 24;
|
||||||
|
uint64_t N_updates;
|
||||||
|
double Slater[LDS * Dim ], SlaterT[LDS * Dim];
|
||||||
|
double Slater_invT[LDS * Dim], Slater_invT_copy[LDS * Dim];
|
||||||
|
double determinant, determinant_copy;
|
||||||
|
|
||||||
|
// SETUP TEST PARAMETERS
|
||||||
|
const double breakdown = 0.001; // default = 0.001. 1e-9 might be too small
|
||||||
|
const double tolerance = 0.001; // default = 0.001
|
||||||
|
double cumulative = 0;
|
||||||
|
|
||||||
|
printf("#----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n");
|
||||||
|
printf("#1\t2\t3\t4\t\t5\t6\t\t7\t\t8\t\t9\t\t10\t\t11\t\t12\t\t13\t\t14\n");
|
||||||
|
printf("#CYCLE\tUPDS\tERR_IN\tERR_BREAK\tERR_OUT\tSPLITS\t\tBLK_FAILS\tMAX\t\tFROB\t\tCOND\t\tCPU_CYC\t\tCPU_CYC/UPD\tCUMUL\t\tREC\n");
|
||||||
|
printf("#----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n");
|
||||||
|
// FOR EACH UPDATE CYCLE DO:
|
||||||
|
for (uint32_t cycles_index = 0; cycles_index < n_cycles; cycles_index++) {
|
||||||
|
// for (uint32_t cycles_index = 0; cycles_index < 1; cycles_index++) {
|
||||||
|
// for (uint32_t cycles_index = 65; cycles_index < 66; cycles_index++) {
|
||||||
|
// for (uint32_t cycles_index = 8055; cycles_index < 8056; cycles_index++) {
|
||||||
|
// 1. READ DATA FROM DATASET
|
||||||
|
uint32_t cycle = cycles[cycles_index];
|
||||||
|
sprintf(nupds_key, "/cycle_%d/nupdates", cycle);
|
||||||
|
sprintf(upd_idx_key, "/cycle_%d/col_update_index", cycle);
|
||||||
|
sprintf(upds_key, "/cycle_%d/updates", cycle);
|
||||||
|
sprintf(slater_key, "/cycle_%d/slater_matrix", cycle);
|
||||||
|
sprintf(slater_inv_key, "/cycle_%d/slater_inverse_t", cycle);
|
||||||
|
sprintf(det_key, "/cycle_%d/determinant", cycle);
|
||||||
|
read_uint(file_id, nupds_key, &N_updates);
|
||||||
|
uint64_t *Updates_index = malloc(N_updates * sizeof(uint64_t));
|
||||||
|
double *Updates = malloc(LDS * N_updates * sizeof(double));
|
||||||
|
read_uint(file_id, upd_idx_key, Updates_index);
|
||||||
|
read_double(file_id, upds_key, Updates);
|
||||||
|
read_double(file_id, slater_key, Slater);
|
||||||
|
read_double(file_id, slater_inv_key, Slater_invT);
|
||||||
|
read_double(file_id, det_key, &determinant);
|
||||||
|
|
||||||
|
// Compute transpose of S. ST: 24 x 21
|
||||||
|
for (int i = 0; i < LDS; i++) {
|
||||||
|
for (int j = 0; j < Dim; j++) {
|
||||||
|
SlaterT[i * Dim + j] = Slater[j * LDS + i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert repl. upds into additive upds.
|
||||||
|
for (int i = 0; i < N_updates; i++) {
|
||||||
|
int col = Updates_index[i] - 1;
|
||||||
|
for (int j = 0; j < LDS; j++) {
|
||||||
|
Updates[i * LDS + j] -= SlaterT[col + j * Dim];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. CHECK ERROR ON THE INPUT DATA AND RECORD RESULT: ERR_INPUT
|
||||||
|
uint32_t err_inp = check_error(LDS, Dim, Slater_invT, SlaterT, tolerance);
|
||||||
|
|
||||||
|
// Update Slater matrix
|
||||||
|
for (int i = 0; i < N_updates; i++) {
|
||||||
|
int col = Updates_index[i] - 1;
|
||||||
|
for (int j = 0; j < Dim; j++) {
|
||||||
|
SlaterT[col + j * Dim] += Updates[i * LDS + j];
|
||||||
|
}
|
||||||
|
} // A this point SlaterT, Updates & the updated SlaterT are correct. Checked in GDB
|
||||||
|
|
||||||
|
int32_t err_break;
|
||||||
|
|
||||||
|
// 3. SET TIME- AND SPLIT ACCUMULATOR TO ZERO
|
||||||
|
double accumulator = 0;
|
||||||
|
double cycles_per_update = 0;
|
||||||
|
n_splits = 0;
|
||||||
|
block_fail = 0;
|
||||||
|
recursive_calls = 0;
|
||||||
|
|
||||||
|
// ## FOR A SET NUMBER OF REPETITIONS DO:
|
||||||
|
for (int rep = 0; rep < REPETITIONS; rep++) {
|
||||||
|
|
||||||
|
// 1. MAKE A FRESH COPY OF THE SLATER INVERSE AND DETERMINANT AND USE THE COPY
|
||||||
|
memcpy(Slater_invT_copy, Slater_invT, LDS * Dim * sizeof(double));
|
||||||
|
determinant_copy = determinant;
|
||||||
|
|
||||||
|
// ### CHOOSE A KERNEL:
|
||||||
|
if (version[0] == 'a') { // Anthony
|
||||||
|
const double *Upds;
|
||||||
|
const uint64_t *Ui;
|
||||||
|
double determinant_previous;
|
||||||
|
|
||||||
|
err_break = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < LDS * Dim; i++) Slater_invT_copy[i] *= determinant_copy; // Multiply inv(Slater-mat) by det(Slater-mat) to get adj(Slater_mat)
|
||||||
|
|
||||||
|
for (int i = 0; i < N_updates; i++) {
|
||||||
|
Upds = &Updates[i * LDS];
|
||||||
|
Ui = &Updates_index[i];
|
||||||
|
determinant_previous = determinant_copy;
|
||||||
|
|
||||||
|
// 1. FETCH START TIME
|
||||||
|
uint64_t before = rdtsc();
|
||||||
|
|
||||||
|
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
|
||||||
|
detupd(Dim, LDS, Upds, Ui, Slater_invT_copy, &determinant_copy);
|
||||||
|
|
||||||
|
// 3. FETCH FINISH TIME
|
||||||
|
uint64_t after = rdtsc();
|
||||||
|
|
||||||
|
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
|
||||||
|
accumulator += (double)(after - before);
|
||||||
|
|
||||||
|
// 5. STOP APPLYING UPDATES IF BREAKDOWN DETECTED
|
||||||
|
double lambda = determinant_copy / determinant_previous; // should be id. to lambda in detupd
|
||||||
|
if (fabs(lambda) < breakdown) {
|
||||||
|
err_break = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (err_break == 1) { // Divide adj(Slater-mat) by OLD det(Slater-mat) to get inv(Slater_mat) again
|
||||||
|
for (int i = 0; i < LDS * Dim; i++) Slater_invT_copy[i] /= determinant_previous;
|
||||||
|
} else { // Divide adj(Slater-mat) by NEW det(Slater-mat) to get inv(Slater_mat) again
|
||||||
|
for (int i = 0; i < LDS * Dim; i++) Slater_invT_copy[i] /= determinant_copy;
|
||||||
|
}
|
||||||
|
} else if (version[0] == 'n') { // Naive
|
||||||
|
|
||||||
|
// 1. FETCH START TIME
|
||||||
|
uint64_t before = rdtsc();
|
||||||
|
|
||||||
|
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
|
||||||
|
err_break = qmckl_sherman_morrison(LDS, Dim, N_updates, Updates,
|
||||||
|
Updates_index, breakdown, Slater_invT_copy, &determinant);
|
||||||
|
|
||||||
|
// 3. FETCH FINISH TIME
|
||||||
|
uint64_t after = rdtsc();
|
||||||
|
|
||||||
|
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
|
||||||
|
accumulator += (double)(after - before);
|
||||||
|
} else if (version[0] == 'l') { // Later
|
||||||
|
|
||||||
|
// 1. FETCH START TIME
|
||||||
|
uint64_t before = rdtsc();
|
||||||
|
|
||||||
|
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
|
||||||
|
err_break = qmckl_sherman_morrison_later(LDS, Dim, N_updates, Updates,
|
||||||
|
Updates_index, breakdown, Slater_invT_copy, &determinant);
|
||||||
|
|
||||||
|
// 3. FETCH FINISH TIME
|
||||||
|
uint64_t after = rdtsc();
|
||||||
|
|
||||||
|
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
|
||||||
|
accumulator += (double)(after - before);
|
||||||
|
} else if (version[0] == '2') { // by twos
|
||||||
|
|
||||||
|
// 1. FETCH START TIME
|
||||||
|
uint64_t before = rdtsc();
|
||||||
|
|
||||||
|
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
|
||||||
|
err_break = qmckl_woodbury_2(LDS, Dim, Updates, Updates_index,
|
||||||
|
breakdown, Slater_invT_copy, &determinant);
|
||||||
|
|
||||||
|
// 3. FETCH FINISH TIME
|
||||||
|
uint64_t after = rdtsc();
|
||||||
|
|
||||||
|
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
|
||||||
|
accumulator += (double)(after - before);
|
||||||
|
|
||||||
|
} else if (version[0] == '3') { // by threes
|
||||||
|
|
||||||
|
// 1. FETCH START TIME
|
||||||
|
uint64_t before = rdtsc();
|
||||||
|
|
||||||
|
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
|
||||||
|
err_break = qmckl_woodbury_3(LDS, Dim, Updates, Updates_index,
|
||||||
|
breakdown, Slater_invT_copy, &determinant);
|
||||||
|
|
||||||
|
// 3. FETCH FINISH TIME
|
||||||
|
uint64_t after = rdtsc();
|
||||||
|
|
||||||
|
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
|
||||||
|
accumulator += (double)(after - before);
|
||||||
|
|
||||||
|
} else if (version[0] == 's') { // Splitting
|
||||||
|
|
||||||
|
// 1. FETCH START TIME
|
||||||
|
uint64_t before = rdtsc();
|
||||||
|
|
||||||
|
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
|
||||||
|
err_break = qmckl_sherman_morrison_splitting(LDS, Dim, N_updates, Updates,
|
||||||
|
Updates_index, breakdown, Slater_invT_copy, &determinant);
|
||||||
|
|
||||||
|
// 3. FETCH FINISH TIME
|
||||||
|
uint64_t after = rdtsc();
|
||||||
|
|
||||||
|
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
|
||||||
|
accumulator += (double)(after - before);
|
||||||
|
} else if (version[0] == 'b') { // Blocked
|
||||||
|
|
||||||
|
// 1. FETCH START TIME
|
||||||
|
uint64_t before = rdtsc();
|
||||||
|
|
||||||
|
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
|
||||||
|
err_break = qmckl_sherman_morrison_smw32s(LDS, Dim, N_updates, Updates,
|
||||||
|
Updates_index, breakdown, Slater_invT_copy, &determinant);
|
||||||
|
|
||||||
|
// 3. FETCH FINISH TIME
|
||||||
|
uint64_t after = rdtsc();
|
||||||
|
|
||||||
|
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
|
||||||
|
accumulator += (double)(after - before);
|
||||||
|
} else if (version[0] == 'm') { // LAPACK/MKL
|
||||||
|
|
||||||
|
// Only send upper Dim x Dim part of matrix to lapack
|
||||||
|
double tmp[Dim*Dim];
|
||||||
|
memcpy(tmp, SlaterT, Dim*Dim*sizeof(double));
|
||||||
|
|
||||||
|
// 1. FETCH START TIME
|
||||||
|
uint64_t before = rdtsc();
|
||||||
|
|
||||||
|
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
|
||||||
|
err_break = inverse(tmp, Dim, Dim);
|
||||||
|
|
||||||
|
// 3. FETCH FINISH TIME
|
||||||
|
uint64_t after = rdtsc();
|
||||||
|
|
||||||
|
// Copy elements of inverse back, adding 0-padding in "correct" place
|
||||||
|
for (uint32_t i = 0; i < Dim; i++) {
|
||||||
|
for (uint32_t j = 0; j < LDS; j++) {
|
||||||
|
if (j < Dim) Slater_invT_copy[i * LDS + j] = tmp[i * Dim + j];
|
||||||
|
else Slater_invT_copy[i * LDS + j] = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
|
||||||
|
accumulator += (double)(after - before);
|
||||||
|
|
||||||
|
} else { // Exit
|
||||||
|
printf("Version '%c' not implemented.\n", version[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} // END OF REPETITIONS LOOP
|
||||||
|
|
||||||
|
// 4. COPY RESULT BACK TO ORIGINAL
|
||||||
|
memcpy(Slater_invT, Slater_invT_copy, LDS * Dim * sizeof(double));
|
||||||
|
determinant = determinant_copy;
|
||||||
|
// At this point Slater_invT contains the correct inverse matrix
|
||||||
|
|
||||||
|
// 5. DIVIDE CYCLE- AND SPLIT-ACCUMULATOR BY NUMBER OF REPETITIONS AND RECORD
|
||||||
|
// DIVIDE CYCLE-ACCUMULATOR BY NUMBER OF UPDATES AND RECORD
|
||||||
|
accumulator /= REPETITIONS;
|
||||||
|
cycles_per_update = accumulator / N_updates;
|
||||||
|
n_splits /= REPETITIONS;
|
||||||
|
block_fail /= REPETITIONS;
|
||||||
|
recursive_calls /= REPETITIONS;
|
||||||
|
|
||||||
|
// 6. ADD THE AVERAGED TIME PER CYCLE OF ACCUMULATER TO
|
||||||
|
// CUMULATIVE RESULT FOR THE ENTIRE DATASET
|
||||||
|
cumulative += accumulator;
|
||||||
|
|
||||||
|
double SSi[Dim * Dim];
|
||||||
|
matmul(SlaterT, Slater_invT, SSi, LDS, Dim);
|
||||||
|
double Res[Dim * Dim];
|
||||||
|
residual(SSi, Res, Dim);
|
||||||
|
const double max = max_norm(Res, Dim, Dim);
|
||||||
|
|
||||||
|
// 7. CHECK ERRROR ON THE UPDATED DATA AND RECORD THE RESULT: ERR_OUT
|
||||||
|
uint32_t err_out = check_error(LDS, Dim, Slater_invT, SlaterT, tolerance);
|
||||||
|
// int32_t err_out = check_error_better(max, tolerance);
|
||||||
|
|
||||||
|
// if (err_out == 1) printf("cycle index %d: cycle %d with %lu upds failed!\n", cycles_index, cycle, N_updates);
|
||||||
|
|
||||||
|
// 8. COMPUTE CONDITION NUMBER
|
||||||
|
const double condnr = condition_number(Slater, Slater_invT, LDS, Dim);
|
||||||
|
const double frob = frobenius_norm(Res, Dim, Dim);
|
||||||
|
|
||||||
|
|
||||||
|
// 10. WRITE RESULTS TO FILE: CYCLE#, #UPDS, ERR_INP, ERR_BREAK, #SPLITS, ERR_OUT, COND, #CLCK_TCKS
|
||||||
|
printf("%u\t%lu\t%u\t%u\t\t%u\t%lu\t\t%lu\t\t%e\t%e\t%e\t%e\t%e\t%e\t%lu\n", cycle, N_updates, err_inp, err_break, err_out, n_splits, block_fail, max, frob, condnr, accumulator, cycles_per_update, cumulative, recursive_calls);
|
||||||
|
|
||||||
|
free(Updates_index);
|
||||||
|
free(Updates);
|
||||||
|
|
||||||
|
} // END OF CYCLE LOOP
|
||||||
|
printf("#----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n");
|
||||||
|
printf("#1\t2\t3\t4\t\t5\t6\t\t7\t\t8\t\t9\t\t10\t\t11\t\t12\t\t13\t\t14\n");
|
||||||
|
printf("#CYCLE\tUPDS\tERR_IN\tERR_BREAK\tERR_OUT\tSPLITS\t\tBLK_FAILS\tMAX\t\tFROB\t\tCOND\t\tCPU_CYC\t\tCPU_CYC/UPD\tCUMUL\t\tREC\n");
|
||||||
|
printf("#----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n");
|
||||||
|
|
||||||
|
(void) H5Fclose(file_id);
|
||||||
|
}
|
92
independent_test_harness/test.c.old
Normal file
92
independent_test_harness/test.c.old
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
#include <assert.h>
|
||||||
|
#include "data_cm.h"
|
||||||
|
#include "meuk.h"
|
||||||
|
|
||||||
|
#define REPETITIONS 10000000
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
|
||||||
|
assert(argc == 3);
|
||||||
|
char *version = argv[1];
|
||||||
|
char *number_of_updates = argv[2];
|
||||||
|
const uint64_t Dim = 21;
|
||||||
|
const uint64_t LDS = 24;
|
||||||
|
// const double breakdown = 1e-3;
|
||||||
|
const double breakdown = 1e-9; // this might be too small and cause NIs
|
||||||
|
uint32_t rc;
|
||||||
|
|
||||||
|
const uint64_t *N_updates;
|
||||||
|
const double *Updates;
|
||||||
|
const uint64_t *Updates_index;
|
||||||
|
double *Slater, *Slater_invT;
|
||||||
|
double determinant;
|
||||||
|
if (number_of_updates[0] == '2') { // 2 Updates
|
||||||
|
N_updates = &N_updates2;
|
||||||
|
Updates = &Updates2[0];
|
||||||
|
Updates_index = &Updates_index2[0];
|
||||||
|
Slater = &Slater2[0];
|
||||||
|
Slater_invT = &Slater_invT2[0]; // Slater_inv in QMC=Chem is actually its transpose
|
||||||
|
determinant = determinant2;
|
||||||
|
} else if (number_of_updates[0] == '3') { // 3 Updates
|
||||||
|
N_updates = &N_updates3;
|
||||||
|
Updates = &Updates3[0];
|
||||||
|
Updates_index = &Updates_index3[0];
|
||||||
|
Slater = &Slater3[0];
|
||||||
|
Slater_invT = &Slater_invT3[0];
|
||||||
|
determinant = determinant3;
|
||||||
|
} else if (number_of_updates[0] == '5') { // 5 Updates
|
||||||
|
N_updates = &N_updates5;
|
||||||
|
Updates = &Updates5[0];
|
||||||
|
Updates_index = &Updates_index5[0];
|
||||||
|
Slater = &Slater5[0];
|
||||||
|
Slater_invT = &Slater_invT5[0];
|
||||||
|
determinant = determinant5;
|
||||||
|
} else { // Exit
|
||||||
|
printf("Incorrect number of updates given\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = check_residual(LDS, Dim, Slater_invT, Slater);
|
||||||
|
assert(rc == 0 && "check_residual()");
|
||||||
|
rc = test_kernel(version, LDS, Dim, *N_updates, Updates, Updates_index,
|
||||||
|
breakdown, Slater, Slater_invT, &determinant);
|
||||||
|
assert(rc == 0 && "test_kernel()");
|
||||||
|
|
||||||
|
// EVERYTHING WORKS UP UNTILL HERE
|
||||||
|
|
||||||
|
uint64_t before = rdtsc();
|
||||||
|
if (version[0] == 'a') { // Anthony
|
||||||
|
for (int i = 0; i < REPETITIONS; i++) {
|
||||||
|
const double* Upds;
|
||||||
|
const uint64_t* Ui;
|
||||||
|
for (int j = 0; j < *N_updates; j++) {
|
||||||
|
Upds = &Updates[j*LDS];
|
||||||
|
Ui = &Updates_index[j];
|
||||||
|
detupd(Dim, LDS, Upds, Ui, Slater_invT, &determinant);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (version[0] == 'n') { // Naive
|
||||||
|
for (int i = 0; i < REPETITIONS; i++) {
|
||||||
|
rc = qmckl_sherman_morrison(LDS, Dim, *N_updates, Updates,
|
||||||
|
Updates_index, breakdown, Slater_invT, &determinant);
|
||||||
|
if (rc != 0) printf("qmckl_sherman_morrison failed\n");
|
||||||
|
}
|
||||||
|
} else if (version[0] == 's') { // Splitting
|
||||||
|
for (int i = 0; i < REPETITIONS; i++) {
|
||||||
|
rc = qmckl_sherman_morrison_splitting(LDS, Dim, *N_updates, Updates,
|
||||||
|
Updates_index, breakdown, Slater_invT, &determinant);
|
||||||
|
if (rc != 0) printf("qmckl_sherman_morrison_splitting failed\n");
|
||||||
|
}
|
||||||
|
} else if (version[0] == 'b') { // Blocked
|
||||||
|
for (int i = 0; i < REPETITIONS; i++) {
|
||||||
|
// rc = qmckl_woodbury_2(LDS, Dim, Updates, Updates_index,
|
||||||
|
// breakdown, Slater_inv, &determinant);
|
||||||
|
// rc = qmckl_woodbury_3(LDS, Dim, Updates, Updates_index,
|
||||||
|
// breakdown, Slater_inv, &determinant);
|
||||||
|
rc = qmckl_sherman_morrison_smw32s(LDS, Dim, *N_updates, Updates,
|
||||||
|
Updates_index, breakdown, Slater_invT, &determinant);
|
||||||
|
if (rc != 0) printf("qmckl_sherman_morrison_smw32s failed\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uint64_t after = rdtsc();
|
||||||
|
printf("cycles = %f\n", ((double)(after - before) / (double) REPETITIONS));
|
||||||
|
}
|
62
independent_test_harness/test.c.older
Normal file
62
independent_test_harness/test.c.older
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
#include <assert.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
static __inline__ uint64_t rdtsc(void) {
|
||||||
|
unsigned hi, lo;
|
||||||
|
__asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
|
||||||
|
return ((unsigned long long)lo) | (((unsigned long long)hi) << 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
int qmckl_sherman_morrison(
|
||||||
|
const uint64_t LDS, const uint64_t Dim, const uint64_t N_updates,
|
||||||
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
||||||
|
const uint64_t *__restrict Updates_index, const double breakdown,
|
||||||
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
||||||
|
double *__restrict determinant);
|
||||||
|
|
||||||
|
int detupd(const uint64_t LDS, const uint64_t Dim, const uint64_t N_updates,
|
||||||
|
const double *__restrict __attribute__((aligned(8))) Updates,
|
||||||
|
const uint64_t *__restrict Updates_index, const double breakdown,
|
||||||
|
double *__restrict __attribute__((aligned(8))) Slater_inv,
|
||||||
|
double *__restrict determinant);
|
||||||
|
|
||||||
|
#define REPETITIONS 100000000
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
|
||||||
|
assert(argc == 2);
|
||||||
|
char *version = argv[1];
|
||||||
|
|
||||||
|
const uint64_t Dim = 21;
|
||||||
|
const uint64_t LDS = 24;
|
||||||
|
const uint64_t N_updates = 1;
|
||||||
|
double Updates[LDS] __attribute__((aligned(8)));
|
||||||
|
uint64_t Updates_index[N_updates];
|
||||||
|
Updates_index[0] = 1;
|
||||||
|
const double breakdown = 1e-3;
|
||||||
|
double Slater_inv[LDS * Dim] __attribute__((aligned(8)));
|
||||||
|
double determinant = 1.0;
|
||||||
|
|
||||||
|
for (int i = 0; i < Dim; i++) {
|
||||||
|
Updates[i] = i;
|
||||||
|
for (int j = 0; j < Dim; j++) {
|
||||||
|
Slater_inv[LDS * i + j] = j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t before = rdtsc();
|
||||||
|
if (version[0] == 'c') {
|
||||||
|
for (int i = 0; i < REPETITIONS; i++) {
|
||||||
|
detupd(LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv,
|
||||||
|
&determinant);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < REPETITIONS; i++) {
|
||||||
|
qmckl_sherman_morrison(LDS, Dim, N_updates, Updates, Updates_index,
|
||||||
|
breakdown, Slater_inv, &determinant);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uint64_t after = rdtsc();
|
||||||
|
printf("cycles = %f\n", ((double)(after - before) / (double)REPETITIONS));
|
||||||
|
}
|
2
qmckl
2
qmckl
@ -1 +1 @@
|
|||||||
Subproject commit e180354cbc939b709bd7fe97a89953447284196c
|
Subproject commit ed953cf9b6e62b4cbd42f18f073805eb519e80f7
|
Loading…
Reference in New Issue
Block a user