Added independent test harness, written in C. It has it's own Makefile and datasets. It is completely independent of the main tree.

This commit is contained in:
Francois Coppens 2022-07-11 14:48:59 +02:00
parent 8bab304cb5
commit 732045284a
40 changed files with 2533 additions and 1 deletions

View File

@ -0,0 +1,43 @@
# FC = gfortran
# CC = gcc
# FFLAGS=-O0 -finline -g -lm -Wall -pedantic
# CFLAGS=-std=c99 -O0 -finline -g -lm -Wall -pedantic
FC = ifort
CC = icc
# FFLAGS=-O0 -warn all -g -pedantic
# CFLAGS=-std=c99 -O0 -Wall -g -pedantic
FFLAGS=-O3 -warn all -ip -finline -ftz -xCORE-AVX2 -g
CFLAGS=-std=c99 -O3 -Wall -ip -finline -ftz -xCORE-AVX2 -g
INCLUDE=-I/usr/include/hdf5/serial
LFLAGS=-L/usr/lib/x86_64-linux-gnu/hdf5/serial -lhdf5 -lhdf5_hl -qmkl=sequential
#FC = verificarlo-f
#CC = verificarlo-c
#FFLAGS=-O3 -finline -g
#CFLAGS=-O3 -finline -g
## Link with icc
test: sm.o test.o detupdate21.o meuk.o
$(CC) $(LFLAGS) -o test sm.o detupdate21.o test.o meuk.o
## Link with ifort
# test: sm.o test.o detupdate21.o meuk.o
# $(FC) $(LFLAGS) -nofor-main -o test sm.o detupdate21.o test.o meuk.o
## Link with gfortran
# test: sm.o test.o detupdate21.o meuk.o
# $(FC) $(LFLAGS) -Wno-main -o test sm.o detupdate21.o test.o meuk.o
%.o: %.f90
$(FC) $(FFLAGS) -c -o $@ $<
%.o : %.c
$(CC) $(CFLAGS) $(INCLUDE) -c -o $@ $<
clean:
rm -rf *.o *genmod* test test
debug_n_2: test
gdb --silent --args ./test n 2
debug_a_2: test
gdb --silent --args ./test a 2

View File

@ -0,0 +1,33 @@
Test method
===========
# > for each update cycle do:
(# of updates changes -> update indices & size of update-matrix changes)
1. read data from dataset
2. check error on the input data and record result: ERR_INPUT
3. set cycle- and split accumulator to zero
## >> for a set number of repetitions do:
1. take a fresh copy (memcpy) of the slater inverse and use it in chosen kernel
### >>> for the chosen kernel do:
1. fetch start cycles
2. execute kernel and remember exit status: ERR_BREAK
(number of splits is recorded in global variable)
3. fetch finish cycles
4. add cycle difference to time acummulator
## > continue: for each update cycle do
4. copy the updated slater-inverse-copy back to original
5a. divide cycle- and split-accumulator by number of repetitions
5b. divide cycle-accumulator by number of updates
6. add the averaged time/update-cycle of accumulater to cummulative-
result for the entire dataset
7. update the slater matrix
8. check the error on the updated data and record the result: ERR_OUT
9. write results to stdout: cycle#, #upds, err_inp, err_break,
#splits, err_out, #clck_tcks, #clck_tcks/upd, cumulative cycles

View File

@ -0,0 +1,27 @@
import numpy as np
import h5py
h5file = h5py.File('dataset_zeropadded_cm.hdf5', 'r')
print(f"#cycle, det, cond_2, cond_f, norm_2, norm_f")
print(f"# 1, 2, 3, 4, 5, 6")
for key in h5file.keys():
cycle = h5file.get(key)
slater_matrix = cycle.get('slater_matrix')
slater_matrix = np.array(slater_matrix )
slater_inverse_t = cycle.get('slater_inverse_t')
slater_inverse_t = np.array(slater_inverse_t)
slater_inverse = slater_inverse_t.transpose()
slater_matrix_sq = slater_matrix[:, 0:21]
slater_inverse_sq = slater_inverse[0:21, :]
det = np.linalg.det(slater_matrix_sq)
cond_2 = np.linalg.cond(slater_inverse_sq, p=2)
cond_f = np.linalg.cond(slater_inverse_sq, p='fro')
Id_appr = np.matmul(slater_matrix_sq, slater_inverse_sq)
Err = Id_appr - np.identity(Id_appr.shape[0])
normf = np.linalg.norm(Err, ord='fro')
norm2 = np.linalg.norm(Err, ord=2)
print(f"{key[6:]}, {det}, {cond_2}, {cond_f}, {norm2}, {normf}")
h5file.close()

View File

@ -0,0 +1 @@
cycles_329_dets/all_cycles.h

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,2 @@
const uint32_t n_cycles = 286;
uint32_t cycles[n_cycles] = {26,27,28,29,31,32,76,301,325,355,356,357,358,360,361,405,630,654,684,685,686,687,689,690,734,959,983,1013,1014,1015,1016,1018,1019,1063,1288,1312,1342,1343,1344,1345,1347,1348,1392,1617,1641,1671,1672,1673,1674,1676,1677,1721,1946,1970,2000,2001,2002,2003,2005,2006,2050,2275,2299,2329,2330,2331,2332,2334,2335,2379,2604,2628,2658,2659,2660,2661,2663,2664,2708,2933,2957,2987,2988,2989,2990,2992,2993,3037,3262,3286,3316,3317,3318,3319,3321,3322,3366,3591,3615,3645,3646,3647,3648,3650,3651,3695,3920,3944,3974,3975,3976,3977,3979,3980,4024,4249,4273,4303,4304,4305,4306,4308,4309,4353,4578,4602,4632,4633,4634,4635,4637,4638,4682,4907,4931,4961,4962,4963,4964,4966,4967,5011,5236,5260,5290,5291,5292,5293,5295,5296,5340,5565,5589,5619,5620,5621,5622,5624,5625,5669,5894,5918,5948,5949,5950,5951,5953,5954,5998,6223,6247,6277,6278,6279,6280,6282,6283,6327,6552,6576,6606,6607,6608,6609,6611,6612,6656,6881,6905,6935,6936,6937,6938,6940,6941,6985,7210,7234,7264,7265,7266,7267,7269,7270,7314,7539,7563,7593,7594,7595,7596,7598,7599,7643,7868,7892,7922,7923,7924,7925,7927,7928,7972,8197,8221,8251,8252,8253,8254,8256,8257,8301,8526,8550,8580,8581,8582,8583,8585,8586,8630,8855,8879,8909,8910,8911,8912,8914,8915,8959,9184,9208,9238,9239,9240,9241,9243,9244,9288,9513,9537,9567,9568,9569,9570,9572,9573,9617,9842,9866,9896,9897,9898,9899,9901,9902,9946,10171,10195,10225,10226,10227,10228,10230,10231,10275};

View File

@ -0,0 +1,2 @@
const uint32_t n_cycles = 288;
uint32_t cycles[n_cycles] = {22,44,45,46,47,50,51,63,184,351,373,374,375,376,379,380,392,513,680,702,703,704,705,708,709,721,842,1009,1031,1032,1033,1034,1037,1038,1050,1171,1338,1360,1361,1362,1363,1366,1367,1379,1500,1667,1689,1690,1691,1692,1695,1696,1708,1829,1996,2018,2019,2020,2021,2024,2025,2037,2158,2325,2347,2348,2349,2350,2353,2354,2366,2487,2654,2676,2677,2678,2679,2682,2683,2695,2816,2983,3005,3006,3007,3008,3011,3012,3024,3145,3312,3334,3335,3336,3337,3340,3341,3353,3474,3641,3663,3664,3665,3666,3669,3670,3682,3803,3970,3992,3993,3994,3995,3998,3999,4011,4132,4299,4321,4322,4323,4324,4327,4328,4340,4461,4628,4650,4651,4652,4653,4656,4657,4669,4790,4957,4979,4980,4981,4982,4985,4986,4998,5119,5286,5308,5309,5310,5311,5314,5315,5327,5448,5615,5637,5638,5639,5640,5643,5644,5656,5777,5944,5966,5967,5968,5969,5972,5973,5985,6106,6273,6295,6296,6297,6298,6301,6302,6314,6435,6602,6624,6625,6626,6627,6630,6631,6643,6764,6931,6953,6954,6955,6956,6959,6960,6972,7093,7260,7282,7283,7284,7285,7288,7289,7301,7422,7589,7611,7612,7613,7614,7617,7618,7630,7751,7918,7940,7941,7942,7943,7946,7947,7959,8080,8247,8269,8270,8271,8272,8275,8276,8288,8409,8576,8598,8599,8600,8601,8604,8605,8617,8738,8905,8927,8928,8929,8930,8933,8934,8946,9067,9234,9256,9257,9258,9259,9262,9263,9275,9396,9563,9585,9586,9587,9588,9591,9592,9604,9725,9892,9914,9915,9916,9917,9920,9921,9933,10054,10221,10243,10244,10245,10246,10249,10250,10262,10383};

View File

@ -0,0 +1,2 @@
const uint32_t n_cycles = 64;
uint32_t cycles[n_cycles] = {41,170,370,499,699,828,1028,1157,1357,1486,1686,1815,2015,2144,2344,2473,2673,2802,3002,3131,3331,3460,3660,3789,3989,4118,4318,4447,4647,4776,4976,5105,5305,5434,5634,5763,5963,6092,6292,6421,6621,6750,6950,7079,7279,7408,7608,7737,7937,8066,8266,8395,8595,8724,8924,9053,9253,9382,9582,9711,9911,10040,10240,10369};

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,2 @@
const uint32_t n_cycles = 978;
uint32_t cycles[n_cycles] = {14,60,65,67,80,81,89,92,139,153,158,172,173,178,179,180,215,223,238,263,268,277,281,284,287,293,298,306,307,318,319,343,389,394,396,409,410,418,421,468,482,487,501,502,507,508,509,544,552,567,592,597,606,610,613,616,622,627,635,636,647,648,672,718,723,725,738,739,747,750,797,811,816,830,831,836,837,838,873,881,896,921,926,935,939,942,945,951,956,964,965,976,977,1001,1047,1052,1054,1067,1068,1076,1079,1126,1140,1145,1159,1160,1165,1166,1167,1202,1210,1225,1250,1255,1264,1268,1271,1274,1280,1285,1293,1294,1305,1306,1330,1376,1381,1383,1396,1397,1405,1408,1455,1469,1474,1488,1489,1494,1495,1496,1531,1539,1554,1579,1584,1593,1597,1600,1603,1609,1614,1622,1623,1634,1635,1659,1705,1710,1712,1725,1726,1734,1737,1784,1798,1803,1817,1818,1823,1824,1825,1860,1868,1883,1908,1913,1922,1926,1929,1932,1938,1943,1951,1952,1963,1964,1988,2034,2039,2041,2054,2055,2063,2066,2113,2127,2132,2146,2147,2152,2153,2154,2189,2197,2212,2237,2242,2251,2255,2258,2261,2267,2272,2280,2281,2292,2293,2317,2363,2368,2370,2383,2384,2392,2395,2442,2456,2461,2475,2476,2481,2482,2483,2518,2526,2541,2566,2571,2580,2584,2587,2590,2596,2601,2609,2610,2621,2622,2646,2692,2697,2699,2712,2713,2721,2724,2771,2785,2790,2804,2805,2810,2811,2812,2847,2855,2870,2895,2900,2909,2913,2916,2919,2925,2930,2938,2939,2950,2951,2975,3021,3026,3028,3041,3042,3050,3053,3100,3114,3119,3133,3134,3139,3140,3141,3176,3184,3199,3224,3229,3238,3242,3245,3248,3254,3259,3267,3268,3279,3280,3304,3350,3355,3357,3370,3371,3379,3382,3429,3443,3448,3462,3463,3468,3469,3470,3505,3513,3528,3553,3558,3567,3571,3574,3577,3583,3588,3596,3597,3608,3609,3633,3679,3684,3686,3699,3700,3708,3711,3758,3772,3777,3791,3792,3797,3798,3799,3834,3842,3857,3882,3887,3896,3900,3903,3906,3912,3917,3925,3926,3937,3938,3962,4008,4013,4015,4028,4029,4037,4040,4087,4101,4106,4120,4121,4126,4127,4128,4163,4171,4186,4211,4216,4225,4229,4232,4235,4241,4246,4254,4255,4266,4267,4291,4337,4342,4344,4357,4358,4366,4369,4416,4430,4435,4449,4450,4455,4456,4457,4492,4500,4515,4540,4545,4554,4558,4561,4564,4570,4575,4583,4584,4595,4596,4620,4666,4671,4673,4686,4687,4695,4698,4745,4759,4764,4778,4779,4784,4785,4786,4821,4829,4844,4869,4874,4883,4887,4890,4893,4899,4904,4912,4913,4924,4925,4949,4995,5000,5002,5015,5016,5024,5027,5074,5088,5093,5107,5108,5113,5114,5115,5150,5158,5173,5198,5203,5212,5216,5219,5222,5228,5233,5241,5242,5253,5254,5278,5324,5329,5331,5344,5345,5353,5356,5403,5417,5422,5436,5437,5442,5443,5444,5479,5487,5502,5527,5532,5541,5545,5548,5551,5557,5562,5570,5571,5582,5583,5607,5653,5658,5660,5673,5674,5682,5685,5732,5746,5751,5765,5766,5771,5772,5773,5808,5816,5831,5856,5861,5870,5874,5877,5880,5886,5891,5899,5900,5911,5912,5936,5982,5987,5989,6002,6003,6011,6014,6061,6075,6080,6094,6095,6100,6101,6102,6137,6145,6160,6185,6190,6199,6203,6206,6209,6215,6220,6228,6229,6240,6241,6265,6311,6316,6318,6331,6332,6340,6343,6390,6404,6409,6423,6424,6429,6430,6431,6466,6474,6489,6514,6519,6528,6532,6535,6538,6544,6549,6557,6558,6569,6570,6594,6640,6645,6647,6660,6661,6669,6672,6719,6733,6738,6752,6753,6758,6759,6760,6795,6803,6818,6843,6848,6857,6861,6864,6867,6873,6878,6886,6887,6898,6899,6923,6969,6974,6976,6989,6990,6998,7001,7048,7062,7067,7081,7082,7087,7088,7089,7124,7132,7147,7172,7177,7186,7190,7193,7196,7202,7207,7215,7216,7227,7228,7252,7298,7303,7305,7318,7319,7327,7330,7377,7391,7396,7410,7411,7416,7417,7418,7453,7461,7476,7501,7506,7515,7519,7522,7525,7531,7536,7544,7545,7556,7557,7581,7627,7632,7634,7647,7648,7656,7659,7706,7720,7725,7739,7740,7745,7746,7747,7782,7790,7805,7830,7835,7844,7848,7851,7854,7860,7865,7873,7874,7885,7886,7910,7956,7961,7963,7976,7977,7985,7988,8035,8049,8054,8068,8069,8074,8075,8076,8111,8119,8134,8159,8164,8173,8177,8180,8183,8189,8194,8202,8203,8214,8215,8239,8285,8290,8292,8305,8306,8314,8317,8364,8378,8383,8397,8398,8403,8404,8405,8440,8448,8463,8488,8493,8502,8506,8509,8512,8518,8523,8531,8532,8543,8544,8568,8614,8619,8621,8634,8635,8643,8646,8693,8707,8712,8726,8727,8732,8733,8734,8769,8777,8792,8817,8822,8831,8835,8838,8841,8847,8852,8860,8861,8872,8873,8897,8943,8948,8950,8963,8964,8972,8975,9022,9036,9041,9055,9056,9061,9062,9063,9098,9106,9121,9146,9151,9160,9164,9167,9170,9176,9181,9189,9190,9201,9202,9226,9272,9277,9279,9292,9293,9301,9304,9351,9365,9370,9384,9385,9390,9391,9392,9427,9435,9450,9475,9480,9489,9493,9496,9499,9505,9510,9518,9519,9530,9531,9555,9601,9606,9608,9621,9622,9630,9633,9680,9694,9699,9713,9714,9719,9720,9721,9756,9764,9779,9804,9809,9818,9822,9825,9828,9834,9839,9847,9848,9859,9860,9884,9930,9935,9937,9950,9951,9959,9962,10009,10023,10028,10042,10043,10048,10049,10050,10085,10093,10108,10133,10138,10147,10151,10154,10157,10163,10168,10176,10177,10188,10189,10213,10259,10264,10266,10279,10280,10288,10291,10338,10352,10357,10371,10372,10377,10378,10379,10414};

View File

@ -0,0 +1,2 @@
const uint32_t n_cycles = 725;
uint32_t cycles[n_cycles] = {66,70,97,105,159,160,193,200,203,204,206,207,236,243,264,265,269,270,271,274,282,289,327,395,399,426,434,488,489,522,529,532,533,535,536,565,572,593,594,598,599,600,603,611,618,656,724,728,755,763,817,818,851,858,861,862,864,865,894,901,922,923,927,928,929,932,940,947,985,1053,1057,1084,1092,1146,1147,1180,1187,1190,1191,1193,1194,1223,1230,1251,1252,1256,1257,1258,1261,1269,1276,1314,1382,1386,1413,1421,1475,1476,1509,1516,1519,1520,1522,1523,1552,1559,1580,1581,1585,1586,1587,1590,1598,1605,1643,1711,1715,1742,1750,1804,1805,1838,1845,1848,1849,1851,1852,1881,1888,1909,1910,1914,1915,1916,1919,1927,1934,1972,2040,2044,2071,2079,2133,2134,2167,2174,2177,2178,2180,2181,2210,2217,2238,2239,2243,2244,2245,2248,2256,2263,2301,2369,2373,2400,2408,2462,2463,2496,2503,2506,2507,2509,2510,2539,2546,2567,2568,2572,2573,2574,2577,2585,2592,2630,2698,2702,2729,2737,2791,2792,2825,2832,2835,2836,2838,2839,2868,2875,2896,2897,2901,2902,2903,2906,2914,2921,2959,3027,3031,3058,3066,3120,3121,3154,3161,3164,3165,3167,3168,3197,3204,3225,3226,3230,3231,3232,3235,3243,3250,3288,3356,3360,3387,3395,3449,3450,3483,3490,3493,3494,3496,3497,3526,3533,3554,3555,3559,3560,3561,3564,3572,3579,3617,3685,3689,3716,3724,3778,3779,3812,3819,3822,3823,3825,3826,3855,3862,3883,3884,3888,3889,3890,3893,3901,3908,3946,4014,4018,4045,4053,4107,4108,4141,4148,4151,4152,4154,4155,4184,4191,4212,4213,4217,4218,4219,4222,4230,4237,4275,4343,4347,4374,4382,4436,4437,4470,4477,4480,4481,4483,4484,4513,4520,4541,4542,4546,4547,4548,4551,4559,4566,4604,4672,4676,4703,4711,4765,4766,4799,4806,4809,4810,4812,4813,4842,4849,4870,4871,4875,4876,4877,4880,4888,4895,4933,5001,5005,5032,5040,5094,5095,5128,5135,5138,5139,5141,5142,5171,5178,5199,5200,5204,5205,5206,5209,5217,5224,5262,5330,5334,5361,5369,5423,5424,5457,5464,5467,5468,5470,5471,5500,5507,5528,5529,5533,5534,5535,5538,5546,5553,5591,5659,5663,5690,5698,5752,5753,5786,5793,5796,5797,5799,5800,5829,5836,5857,5858,5862,5863,5864,5867,5875,5882,5920,5988,5992,6019,6027,6081,6082,6115,6122,6125,6126,6128,6129,6158,6165,6186,6187,6191,6192,6193,6196,6204,6211,6249,6317,6321,6348,6356,6410,6411,6444,6451,6454,6455,6457,6458,6487,6494,6515,6516,6520,6521,6522,6525,6533,6540,6578,6646,6650,6677,6685,6739,6740,6773,6780,6783,6784,6786,6787,6816,6823,6844,6845,6849,6850,6851,6854,6862,6869,6907,6975,6979,7006,7014,7068,7069,7102,7109,7112,7113,7115,7116,7145,7152,7173,7174,7178,7179,7180,7183,7191,7198,7236,7304,7308,7335,7343,7397,7398,7431,7438,7441,7442,7444,7445,7474,7481,7502,7503,7507,7508,7509,7512,7520,7527,7565,7633,7637,7664,7672,7726,7727,7760,7767,7770,7771,7773,7774,7803,7810,7831,7832,7836,7837,7838,7841,7849,7856,7894,7962,7966,7993,8001,8055,8056,8089,8096,8099,8100,8102,8103,8132,8139,8160,8161,8165,8166,8167,8170,8178,8185,8223,8291,8295,8322,8330,8384,8385,8418,8425,8428,8429,8431,8432,8461,8468,8489,8490,8494,8495,8496,8499,8507,8514,8552,8620,8624,8651,8659,8713,8714,8747,8754,8757,8758,8760,8761,8790,8797,8818,8819,8823,8824,8825,8828,8836,8843,8881,8949,8953,8980,8988,9042,9043,9076,9083,9086,9087,9089,9090,9119,9126,9147,9148,9152,9153,9154,9157,9165,9172,9210,9278,9282,9309,9317,9371,9372,9405,9412,9415,9416,9418,9419,9448,9455,9476,9477,9481,9482,9483,9486,9494,9501,9539,9607,9611,9638,9646,9700,9701,9734,9741,9744,9745,9747,9748,9777,9784,9805,9806,9810,9811,9812,9815,9823,9830,9868,9936,9940,9967,9975,10029,10030,10063,10070,10073,10074,10076,10077,10106,10113,10134,10135,10139,10140,10141,10144,10152,10159,10197,10265,10269,10296,10304,10358,10359,10392,10399,10402,10403,10405,10406};

View File

@ -0,0 +1,2 @@
const uint32_t n_cycles = 884;
uint32_t cycles[n_cycles] = {95,122,132,134,136,152,154,155,164,166,167,169,190,192,194,202,219,220,234,237,244,251,266,276,290,291,292,310,424,451,461,463,465,481,483,484,493,495,496,498,519,521,523,531,548,549,563,566,573,580,595,605,619,620,621,639,753,780,790,792,794,810,812,813,822,824,825,827,848,850,852,860,877,878,892,895,902,909,924,934,948,949,950,968,1082,1109,1119,1121,1123,1139,1141,1142,1151,1153,1154,1156,1177,1179,1181,1189,1206,1207,1221,1224,1231,1238,1253,1263,1277,1278,1279,1297,1411,1438,1448,1450,1452,1468,1470,1471,1480,1482,1483,1485,1506,1508,1510,1518,1535,1536,1550,1553,1560,1567,1582,1592,1606,1607,1608,1626,1740,1767,1777,1779,1781,1797,1799,1800,1809,1811,1812,1814,1835,1837,1839,1847,1864,1865,1879,1882,1889,1896,1911,1921,1935,1936,1937,1955,2069,2096,2106,2108,2110,2126,2128,2129,2138,2140,2141,2143,2164,2166,2168,2176,2193,2194,2208,2211,2218,2225,2240,2250,2264,2265,2266,2284,2398,2425,2435,2437,2439,2455,2457,2458,2467,2469,2470,2472,2493,2495,2497,2505,2522,2523,2537,2540,2547,2554,2569,2579,2593,2594,2595,2613,2727,2754,2764,2766,2768,2784,2786,2787,2796,2798,2799,2801,2822,2824,2826,2834,2851,2852,2866,2869,2876,2883,2898,2908,2922,2923,2924,2942,3056,3083,3093,3095,3097,3113,3115,3116,3125,3127,3128,3130,3151,3153,3155,3163,3180,3181,3195,3198,3205,3212,3227,3237,3251,3252,3253,3271,3385,3412,3422,3424,3426,3442,3444,3445,3454,3456,3457,3459,3480,3482,3484,3492,3509,3510,3524,3527,3534,3541,3556,3566,3580,3581,3582,3600,3714,3741,3751,3753,3755,3771,3773,3774,3783,3785,3786,3788,3809,3811,3813,3821,3838,3839,3853,3856,3863,3870,3885,3895,3909,3910,3911,3929,4043,4070,4080,4082,4084,4100,4102,4103,4112,4114,4115,4117,4138,4140,4142,4150,4167,4168,4182,4185,4192,4199,4214,4224,4238,4239,4240,4258,4372,4399,4409,4411,4413,4429,4431,4432,4441,4443,4444,4446,4467,4469,4471,4479,4496,4497,4511,4514,4521,4528,4543,4553,4567,4568,4569,4587,4701,4728,4738,4740,4742,4758,4760,4761,4770,4772,4773,4775,4796,4798,4800,4808,4825,4826,4840,4843,4850,4857,4872,4882,4896,4897,4898,4916,5030,5057,5067,5069,5071,5087,5089,5090,5099,5101,5102,5104,5125,5127,5129,5137,5154,5155,5169,5172,5179,5186,5201,5211,5225,5226,5227,5245,5359,5386,5396,5398,5400,5416,5418,5419,5428,5430,5431,5433,5454,5456,5458,5466,5483,5484,5498,5501,5508,5515,5530,5540,5554,5555,5556,5574,5688,5715,5725,5727,5729,5745,5747,5748,5757,5759,5760,5762,5783,5785,5787,5795,5812,5813,5827,5830,5837,5844,5859,5869,5883,5884,5885,5903,6017,6044,6054,6056,6058,6074,6076,6077,6086,6088,6089,6091,6112,6114,6116,6124,6141,6142,6156,6159,6166,6173,6188,6198,6212,6213,6214,6232,6346,6373,6383,6385,6387,6403,6405,6406,6415,6417,6418,6420,6441,6443,6445,6453,6470,6471,6485,6488,6495,6502,6517,6527,6541,6542,6543,6561,6675,6702,6712,6714,6716,6732,6734,6735,6744,6746,6747,6749,6770,6772,6774,6782,6799,6800,6814,6817,6824,6831,6846,6856,6870,6871,6872,6890,7004,7031,7041,7043,7045,7061,7063,7064,7073,7075,7076,7078,7099,7101,7103,7111,7128,7129,7143,7146,7153,7160,7175,7185,7199,7200,7201,7219,7333,7360,7370,7372,7374,7390,7392,7393,7402,7404,7405,7407,7428,7430,7432,7440,7457,7458,7472,7475,7482,7489,7504,7514,7528,7529,7530,7548,7662,7689,7699,7701,7703,7719,7721,7722,7731,7733,7734,7736,7757,7759,7761,7769,7786,7787,7801,7804,7811,7818,7833,7843,7857,7858,7859,7877,7991,8018,8028,8030,8032,8048,8050,8051,8060,8062,8063,8065,8086,8088,8090,8098,8115,8116,8130,8133,8140,8147,8162,8172,8186,8187,8188,8206,8320,8347,8357,8359,8361,8377,8379,8380,8389,8391,8392,8394,8415,8417,8419,8427,8444,8445,8459,8462,8469,8476,8491,8501,8515,8516,8517,8535,8649,8676,8686,8688,8690,8706,8708,8709,8718,8720,8721,8723,8744,8746,8748,8756,8773,8774,8788,8791,8798,8805,8820,8830,8844,8845,8846,8864,8978,9005,9015,9017,9019,9035,9037,9038,9047,9049,9050,9052,9073,9075,9077,9085,9102,9103,9117,9120,9127,9134,9149,9159,9173,9174,9175,9193,9307,9334,9344,9346,9348,9364,9366,9367,9376,9378,9379,9381,9402,9404,9406,9414,9431,9432,9446,9449,9456,9463,9478,9488,9502,9503,9504,9522,9636,9663,9673,9675,9677,9693,9695,9696,9705,9707,9708,9710,9731,9733,9735,9743,9760,9761,9775,9778,9785,9792,9807,9817,9831,9832,9833,9851,9965,9992,10002,10004,10006,10022,10024,10025,10034,10036,10037,10039,10060,10062,10064,10072,10089,10090,10104,10107,10114,10121,10136,10146,10160,10161,10162,10180,10294,10321,10331,10333,10335,10351,10353,10354,10363,10365,10366,10368,10389,10391,10393,10401};

View File

@ -0,0 +1,2 @@
const uint32_t n_cycles = 346;
uint32_t cycles[n_cycles] = {62,102,131,137,216,217,233,235,322,323,324,391,431,460,466,545,546,562,564,651,652,653,720,760,789,795,874,875,891,893,980,981,982,1049,1089,1118,1124,1203,1204,1220,1222,1309,1310,1311,1378,1418,1447,1453,1532,1533,1549,1551,1638,1639,1640,1707,1747,1776,1782,1861,1862,1878,1880,1967,1968,1969,2036,2076,2105,2111,2190,2191,2207,2209,2296,2297,2298,2365,2405,2434,2440,2519,2520,2536,2538,2625,2626,2627,2694,2734,2763,2769,2848,2849,2865,2867,2954,2955,2956,3023,3063,3092,3098,3177,3178,3194,3196,3283,3284,3285,3352,3392,3421,3427,3506,3507,3523,3525,3612,3613,3614,3681,3721,3750,3756,3835,3836,3852,3854,3941,3942,3943,4010,4050,4079,4085,4164,4165,4181,4183,4270,4271,4272,4339,4379,4408,4414,4493,4494,4510,4512,4599,4600,4601,4668,4708,4737,4743,4822,4823,4839,4841,4928,4929,4930,4997,5037,5066,5072,5151,5152,5168,5170,5257,5258,5259,5326,5366,5395,5401,5480,5481,5497,5499,5586,5587,5588,5655,5695,5724,5730,5809,5810,5826,5828,5915,5916,5917,5984,6024,6053,6059,6138,6139,6155,6157,6244,6245,6246,6313,6353,6382,6388,6467,6468,6484,6486,6573,6574,6575,6642,6682,6711,6717,6796,6797,6813,6815,6902,6903,6904,6971,7011,7040,7046,7125,7126,7142,7144,7231,7232,7233,7300,7340,7369,7375,7454,7455,7471,7473,7560,7561,7562,7629,7669,7698,7704,7783,7784,7800,7802,7889,7890,7891,7958,7998,8027,8033,8112,8113,8129,8131,8218,8219,8220,8287,8327,8356,8362,8441,8442,8458,8460,8547,8548,8549,8616,8656,8685,8691,8770,8771,8787,8789,8876,8877,8878,8945,8985,9014,9020,9099,9100,9116,9118,9205,9206,9207,9274,9314,9343,9349,9428,9429,9445,9447,9534,9535,9536,9603,9643,9672,9678,9757,9758,9774,9776,9863,9864,9865,9932,9972,10001,10007,10086,10087,10103,10105,10192,10193,10194,10261,10301,10330,10336,10415};

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
Empty placeholder. Please download real dataset from: trex@trex-share.univ-tlse3.fr:uvsq/datasets/dataset_15784d_zeropadded_cm.hdf5

View File

@ -0,0 +1 @@
Empty placeholder. Please download real dataset from: trex@trex-share.univ-tlse3.fr:uvsq/datasets/dataset_329d_zeropadded_cm.hdf5

View File

@ -0,0 +1,108 @@
subroutine detupd(vDim, vLDS, Updates, Updates_index, &
Slater_inv, determinant) bind(C)
use iso_c_binding
implicit none ! det.irp.f_template_577: 428
external :: det_update21
integer(c_int64_t), intent(in), value :: vLDS, vDim
real(c_double), intent(in) :: Updates(vLDS)
integer(c_int64_t), intent(in) :: Updates_index(1)
real(c_double), intent(inout) :: Slater_inv(vLDS,vDim)
real(c_double), intent(inout) :: determinant
integer(c_int64_t) :: l, n, LDS ! det.irp.f_template_577: 432
n = vDim
LDS = vLDS
l = Updates_index(1)
call det_update21(n, LDS, Updates, l, Slater_inv, determinant) ! det.irp.f_template_577: 427
end
subroutine det_update21(n, LDS, u, l, S_inv, d) ! det.irp.f_template_577: 427
use iso_c_binding
implicit none ! det.irp.f_template_577: 428
integer(c_int64_t), intent(in) :: l ! det.irp.f_template_577: 430
integer(c_int64_t), intent(in) :: n,LDS ! det.irp.f_template_577: 430
real(c_double),intent(inout) :: S_inv(LDS,n) ! det.irp.f_template_577: 435
real(c_double),intent(inout) :: d ! det.irp.f_template_577: 436
real(c_double), intent(in) :: u(n)
real(c_double) :: z(n), w(n), lambda, d_inv ! det.irp.f_template_577: 438
integer(c_int64_t) :: i,j ! det.irp.f_template_577: 444
real(c_double) :: zj, zj1, zj2, zj3 ! det.irp.f_template_577: 445
!DIR$ ATTRIBUTES ALIGN : 32 :: z, w ! det.irp.f_template_577: 439
!DIR$ ASSUME_ALIGNED u : 32 ! det.irp.f_template_577: 440
!DIR$ ASSUME_ALIGNED S_inv : 32 ! det.irp.f_template_577: 441
!DIR$ ASSUME (mod(LDS,32/8) == 0) ! det.irp.f_template_577: 442
!DIR$ ASSUME (LDS >= 21) ! det.irp.f_template_577: 443
zj = 0.d0 !! dot prod col S_inv and update: vT*S_inv*u ! det.irp.f_template_577: 451
!DIR$ NOPREFETCH ! det.irp.f_template_577: 452
do i=1,21-1,4 ! det.irp.f_template_577: 453
zj = zj + S_inv(i,l)*u(i) + S_inv(i+1,l)*u(i+1) &
+ S_inv(i+2,l)*u(i+2) + S_inv(i+3,l)*u(i+3) ! det.irp.f_template_577: 454
enddo ! det.irp.f_template_577: 456
zj = zj + S_inv(21,l)*u(21) ! det.irp.f_template_577: 457
d_inv = 1.d0/d ! reciprocal of old det ! det.irp.f_template_577: 459
d = d+zj ! det.irp.f_template_577: 460
lambda = d*d_inv ! det.irp.f_template_577: 461
if (dabs(lambda) < 1.d-3) then ! det.irp.f_template_577: 462
! d = 0.d0 ! det.irp.f_template_577: 463
return ! det.irp.f_template_577: 464
endif ! det.irp.f_template_577: 465
!DIR$ VECTOR ALIGNED ! det.irp.f_template_577: 467
do j=1,21-1,4 ! det.irp.f_template_577: 468
zj = 0.d0 ! det.irp.f_template_577: 469
zj1 = 0.d0 ! det.irp.f_template_577: 470
zj2 = 0.d0 ! det.irp.f_template_577: 471
zj3 = 0.d0 ! det.irp.f_template_577: 472
!DIR$ VECTOR ALIGNED ! det.irp.f_template_577: 473
!DIR$ NOPREFETCH ! det.irp.f_template_577: 474
do i=1,21-1 ! det.irp.f_template_577: 475
zj = zj + S_inv(i,j )*u(i) ! det.irp.f_template_577: 476
zj1 = zj1 + S_inv(i,j+1)*u(i) ! det.irp.f_template_577: 477
zj2 = zj2 + S_inv(i,j+2)*u(i) ! det.irp.f_template_577: 478
zj3 = zj3 + S_inv(i,j+3)*u(i) ! det.irp.f_template_577: 479
enddo ! det.irp.f_template_577: 480
z(j ) = zj + S_inv(21,j )*u(21) ! det.irp.f_template_577: 481
z(j+1) = zj1 + S_inv(21,j+1)*u(21) ! det.irp.f_template_577: 482
z(j+2) = zj2 + S_inv(21,j+2)*u(21) ! det.irp.f_template_577: 483
z(j+3) = zj3 + S_inv(21,j+3)*u(21) ! det.irp.f_template_577: 484
enddo ! det.irp.f_template_577: 485
zj = 0.d0 ! det.irp.f_template_577: 487
!DIR$ VECTOR ALIGNED ! det.irp.f_template_577: 488
!DIR$ NOPREFETCH ! det.irp.f_template_577: 489
do i=1,21-1 ! det.irp.f_template_577: 490
zj = zj + S_inv(i,21)*u(i) ! det.irp.f_template_577: 491
enddo ! det.irp.f_template_577: 492
z(21) = zj + S_inv(21,21)*u(21) ! det.irp.f_template_577: 493
!DIR$ NOPREFETCH ! det.irp.f_template_577: 495
do i=1,21 ! det.irp.f_template_577: 496
w(i) = S_inv(i,l)*d_inv ! det.irp.f_template_577: 497
enddo ! det.irp.f_template_577: 499
do i=1,21-1,4 ! det.irp.f_template_577: 501
zj = z(i ) ! det.irp.f_template_577: 502
zj1 = z(i+1) ! det.irp.f_template_577: 503
zj2 = z(i+2) ! det.irp.f_template_577: 504
zj3 = z(i+3) ! det.irp.f_template_577: 505
!DIR$ VECTOR ALIGNED ! det.irp.f_template_577: 506
!DIR$ NOPREFETCH ! det.irp.f_template_577: 507
do j=1,21-1 ! det.irp.f_template_577: 508
S_inv(j,i ) = S_inv(j,i )*lambda - w(j)*zj ! det.irp.f_template_577: 509
S_inv(j,i+1) = S_inv(j,i+1)*lambda - w(j)*zj1 ! det.irp.f_template_577: 510
S_inv(j,i+2) = S_inv(j,i+2)*lambda - w(j)*zj2 ! det.irp.f_template_577: 511
S_inv(j,i+3) = S_inv(j,i+3)*lambda - w(j)*zj3 ! det.irp.f_template_577: 512
enddo ! det.irp.f_template_577: 513
S_inv(21,i ) = S_inv(21,i )*lambda - w(21)*zj ! det.irp.f_template_577: 514
S_inv(21,i+1) = S_inv(21,i+1)*lambda - w(21)*zj1 ! det.irp.f_template_577: 515
S_inv(21,i+2) = S_inv(21,i+2)*lambda - w(21)*zj2 ! det.irp.f_template_577: 516
S_inv(21,i+3) = S_inv(21,i+3)*lambda - w(21)*zj3 ! det.irp.f_template_577: 517
enddo ! det.irp.f_template_577: 518
zj = z(21) ! det.irp.f_template_577: 520
!DIR$ VECTOR ALIGNED ! det.irp.f_template_577: 521
!DIR$ NOPREFETCH ! det.irp.f_template_577: 522
do i=1,21 ! det.irp.f_template_577: 523
S_inv(i,21) = S_inv(i,21)*lambda -w(i)*zj ! det.irp.f_template_577: 524
enddo ! det.irp.f_template_577: 525
end ! det.irp.f_template_577: 528

View File

@ -0,0 +1,28 @@
#!/bin/bash
IN=$1 # Input dataset (hdf5)
NU=$2 # Number of updates
CYCLE_LIST=$(h5ls ${IN} | awk '{print$1}' | sed 's/cycle_//g' | sort -n)
SELECTION=()
# Filter CYCLE_LIST and add to SELECTION
for CYCLE in ${CYCLE_LIST}
do
NUPDS=$(h5ls -d ${IN}/cycle_${CYCLE}/nupdates | awk 'FNR == 3 {print $2}')
if (( NUPDS == NU ))
then
SELECTION+=($CYCLE)
fi
# SELECTION+=($CYCLE)
done
# Generate C-header file
NELEMENTS=${#SELECTION[@]}
echo "const uint32_t n_cycles = $NELEMENTS;" > ${NU}_cycles.h
echo -n "uint32_t cycles[n_cycles] = {" >> ${NU}_cycles.h
for VAL in "${SELECTION[@]}"
do
echo -n "$VAL," >> ${NU}_cycles.h
done
truncate -s-1 ${NU}_cycles.h # remove last ','
echo "};" >> ${NU}_cycles.h

View File

@ -0,0 +1,806 @@
#! /bin/octave -qf
data_anthony=load('ANTHONY.dat');
data_naive=load('NAIVE.dat');
data_later=load('LATER.dat');
data_split=load('SPLITTING.dat');
data_blocked=load('BLOCKED.dat');
data_lapack=load('MKL_LAPACK.dat');
data_wb2=load('WB2.dat');
data_wb3=load('WB3.dat');
indcs=(data_anthony(:,5)==0); % select cycles that passed
anthony_pass_all=data_anthony(indcs,:);
dlmwrite('anthony_pass_all.dat',anthony_pass_all, ' ')
indcs=(data_anthony(:,5)!=0); % select cycles that failed
anthony_fail_all=data_anthony(indcs,:);
dlmwrite('anthony_fail_all.dat',anthony_fail_all, ' ')
indcs=(anthony_pass_all(:,2)==1); % select cycles that passed containing 1 upd
anthony_pass_1=anthony_pass_all(indcs,:);
dlmwrite('anthony_pass_1.dat',anthony_pass_1, ' ')
indcs=(anthony_fail_all(:,2)==1); % select cycles that failed containing 1 upd
anthony_fail_1=anthony_fail_all(indcs,:);
dlmwrite('anthony_fail_1.dat',anthony_fail_1, ' ')
indcs=(anthony_pass_all(:,2)==2); % select cycles that passed containing 2 upd
anthony_pass_2=anthony_pass_all(indcs,:);
dlmwrite('anthony_pass_2.dat',anthony_pass_2, ' ')
indcs=(anthony_fail_all(:,2)==2); % select cycles that failed containing 2 upd
anthony_fail_2=anthony_fail_all(indcs,:);
dlmwrite('anthony_fail_2.dat',anthony_fail_2, ' ')
indcs=(anthony_pass_all(:,2)==3); % select cycles that passed containing 3 upd
anthony_pass_3=anthony_pass_all(indcs,:);
dlmwrite('anthony_pass_3.dat',anthony_pass_3, ' ')
indcs=(anthony_fail_all(:,2)==3); % select cycles that failed containing 3 upd
anthony_fail_3=anthony_fail_all(indcs,:);
dlmwrite('anthony_fail_3.dat',anthony_fail_3, ' ')
indcs=(anthony_pass_all(:,2)==4); % select cycles that passed containing 4 upd
anthony_pass_4=anthony_pass_all(indcs,:);
dlmwrite('anthony_pass_4.dat',anthony_pass_4, ' ')
indcs=(anthony_fail_all(:,2)==4); % select cycles that failed containing 4 upd
anthony_fail_4=anthony_fail_all(indcs,:);
dlmwrite('anthony_fail_4.dat',anthony_fail_4, ' ')
indcs=(anthony_pass_all(:,2)==5); % select cycles that passed containing 5 upd
anthony_pass_5=anthony_pass_all(indcs,:);
dlmwrite('anthony_pass_5.dat',anthony_pass_5, ' ')
indcs=(anthony_fail_all(:,2)==5); % select cycles that failed containing 5 upd
anthony_fail_5=anthony_fail_all(indcs,:);
dlmwrite('anthony_fail_5.dat',anthony_fail_5, ' ')
indcs=(anthony_pass_all(:,2)==6); % select cycles that passed containing 6 upd
anthony_pass_6=anthony_pass_all(indcs,:);
dlmwrite('anthony_pass_6.dat',anthony_pass_6, ' ')
indcs=(anthony_fail_all(:,2)==6); % select cycles that failed containing 6 upd
anthony_fail_6=anthony_fail_all(indcs,:);
dlmwrite('anthony_fail_6.dat',anthony_fail_6, ' ')
indcs=(anthony_pass_all(:,2)==7); % select cycles that passed containing 7 upd
anthony_pass_7=anthony_pass_all(indcs,:);
dlmwrite('anthony_pass_7.dat',anthony_pass_7, ' ')
indcs=(anthony_fail_all(:,2)==7); % select cycles that failed containing 7 upd
anthony_fail_7=anthony_fail_all(indcs,:);
dlmwrite('anthony_fail_7.dat',anthony_fail_7, ' ')
indcs=(anthony_pass_all(:,2)==8); % select cycles that passed containing 8 upd
anthony_pass_8=anthony_pass_all(indcs,:);
dlmwrite('anthony_pass_8.dat',anthony_pass_8, ' ')
indcs=(anthony_fail_all(:,2)==8); % select cycles that failed containing 8 upd
anthony_fail_8=anthony_fail_all(indcs,:);
dlmwrite('anthony_fail_8.dat',anthony_fail_8, ' ')
indcs=(anthony_pass_all(:,2)==9); % select cycles that passed containing 9 upd
anthony_pass_9=anthony_pass_all(indcs,:);
dlmwrite('anthony_pass_9.dat',anthony_pass_9, ' ')
indcs=(anthony_fail_all(:,2)==9); % select cycles that failed containing 9 upd
anthony_fail_9=anthony_fail_all(indcs,:);
dlmwrite('anthony_fail_9.dat',anthony_fail_9, ' ')
indcs=(anthony_pass_all(:,2)==10); % select cycles that passed containing 10 upd
anthony_pass_10=anthony_pass_all(indcs,:);
dlmwrite('anthony_pass_10.dat',anthony_pass_10, ' ')
indcs=(anthony_fail_all(:,2)==10); % select cycles that failed containing 10 upd
anthony_fail_10=anthony_fail_all(indcs,:);
dlmwrite('anthony_fail_10.dat',anthony_fail_10, ' ')
indcs=(anthony_pass_all(:,2)==11); % select cycles that passed containing 11 upd
anthony_pass_11=anthony_pass_all(indcs,:);
dlmwrite('anthony_pass_11.dat',anthony_pass_11, ' ')
indcs=(anthony_fail_all(:,2)==11); % select cycles that failed containing 11 upd
anthony_fail_11=anthony_fail_all(indcs,:);
dlmwrite('anthony_fail_11.dat',anthony_fail_11, ' ')
indcs=(anthony_pass_all(:,2)==12); % select cycles that passed containing 12 upd
anthony_pass_12=anthony_pass_all(indcs,:);
dlmwrite('anthony_pass_12.dat',anthony_pass_12, ' ')
indcs=(anthony_fail_all(:,2)==12); % select cycles that failed containing 12 upd
anthony_fail_12=anthony_fail_all(indcs,:);
dlmwrite('anthony_fail_12.dat',anthony_fail_12, ' ')
indcs=(anthony_pass_all(:,2)==13); % select cycles that passed containing 13 upd
anthony_pass_13=anthony_pass_all(indcs,:);
dlmwrite('anthony_pass_13.dat',anthony_pass_13, ' ')
indcs=(anthony_fail_all(:,2)==13); % select cycles that failed containing 13 upd
anthony_fail_13=anthony_fail_all(indcs,:);
dlmwrite('anthony_fail_13.dat',anthony_fail_13, ' ')
indcs=(anthony_pass_all(:,2)==14); % select cycles that passed containing 14 upd
anthony_pass_14=anthony_pass_all(indcs,:);
dlmwrite('anthony_pass_14.dat',anthony_pass_14, ' ')
indcs=(anthony_fail_all(:,2)==14); % select cycles that failed containing 14 upd
anthony_fail_14=anthony_fail_all(indcs,:);
dlmwrite('anthony_fail_14.dat',anthony_fail_14, ' ')
indcs=(anthony_pass_all(:,2)==15); % select cycles that passed containing 15 upd
anthony_pass_15=anthony_pass_all(indcs,:);
dlmwrite('anthony_pass_15.dat',anthony_pass_15, ' ')
indcs=(anthony_fail_all(:,2)==15); % select cycles that failed containing 15 upd
anthony_fail_15=anthony_fail_all(indcs,:);
dlmwrite('anthony_fail_15.dat',anthony_fail_15, ' ')
indcs=(data_naive(:,5)==0); % select cycles that passed
naive_pass_all=data_naive(indcs,:);
dlmwrite('naive_pass_all.dat',naive_pass_all, ' ')
indcs=(data_naive(:,5)!=0); % select cycles that failed
naive_fail_all=data_naive(indcs,:);
dlmwrite('naive_fail_all.dat',naive_fail_all, ' ')
indcs=(naive_pass_all(:,2)==1); % select cycles that passed containing 1 upd
naive_pass_1=naive_pass_all(indcs,:);
dlmwrite('naive_pass_1.dat',naive_pass_1, ' ')
indcs=(naive_fail_all(:,2)==1); % select cycles that failed containing 1 upd
naive_fail_1=naive_fail_all(indcs,:);
dlmwrite('naive_fail_1.dat',naive_fail_1, ' ')
indcs=(naive_pass_all(:,2)==2); % select cycles that passed containing 2 upd
naive_pass_2=naive_pass_all(indcs,:);
dlmwrite('naive_pass_2.dat',naive_pass_2, ' ')
indcs=(naive_fail_all(:,2)==2); % select cycles that failed containing 2 upd
naive_fail_2=naive_fail_all(indcs,:);
dlmwrite('naive_fail_2.dat',naive_fail_2, ' ')
indcs=(naive_pass_all(:,2)==3); % select cycles that passed containing 3 upd
naive_pass_3=naive_pass_all(indcs,:);
dlmwrite('naive_pass_3.dat',naive_pass_3, ' ')
indcs=(naive_fail_all(:,2)==3); % select cycles that failed containing 3 upd
naive_fail_3=naive_fail_all(indcs,:);
dlmwrite('naive_fail_3.dat',naive_fail_3, ' ')
indcs=(naive_pass_all(:,2)==4); % select cycles that passed containing 4 upd
naive_pass_4=naive_pass_all(indcs,:);
dlmwrite('naive_pass_4.dat',naive_pass_4, ' ')
indcs=(naive_fail_all(:,2)==4); % select cycles that failed containing 4 upd
naive_fail_4=naive_fail_all(indcs,:);
dlmwrite('naive_fail_4.dat',naive_fail_4, ' ')
indcs=(naive_pass_all(:,2)==5); % select cycles that passed containing 5 upd
naive_pass_5=naive_pass_all(indcs,:);
dlmwrite('naive_pass_5.dat',naive_pass_5, ' ')
indcs=(naive_fail_all(:,2)==5); % select cycles that failed containing 5 upd
naive_fail_5=naive_fail_all(indcs,:);
dlmwrite('naive_fail_5.dat',naive_fail_5, ' ')
indcs=(naive_pass_all(:,2)==6); % select cycles that passed containing 6 upd
naive_pass_6=naive_pass_all(indcs,:);
dlmwrite('naive_pass_6.dat',naive_pass_6, ' ')
indcs=(naive_fail_all(:,2)==6); % select cycles that failed containing 6 upd
naive_fail_6=naive_fail_all(indcs,:);
dlmwrite('naive_fail_6.dat',naive_fail_6, ' ')
indcs=(naive_pass_all(:,2)==7); % select cycles that passed containing 7 upd
naive_pass_7=naive_pass_all(indcs,:);
dlmwrite('naive_pass_7.dat',naive_pass_7, ' ')
indcs=(naive_fail_all(:,2)==7); % select cycles that failed containing 7 upd
naive_fail_7=naive_fail_all(indcs,:);
dlmwrite('naive_fail_7.dat',naive_fail_7, ' ')
indcs=(naive_pass_all(:,2)==8); % select cycles that passed containing 8 upd
naive_pass_8=naive_pass_all(indcs,:);
dlmwrite('naive_pass_8.dat',naive_pass_8, ' ')
indcs=(naive_fail_all(:,2)==8); % select cycles that failed containing 8 upd
naive_fail_8=naive_fail_all(indcs,:);
dlmwrite('naive_fail_8.dat',naive_fail_8, ' ')
indcs=(naive_pass_all(:,2)==9); % select cycles that passed containing 9 upd
naive_pass_9=naive_pass_all(indcs,:);
dlmwrite('naive_pass_9.dat',naive_pass_9, ' ')
indcs=(naive_fail_all(:,2)==9); % select cycles that failed containing 9 upd
naive_fail_9=naive_fail_all(indcs,:);
dlmwrite('naive_fail_9.dat',naive_fail_9, ' ')
indcs=(naive_pass_all(:,2)==10); % select cycles that passed containing 10 upd
naive_pass_10=naive_pass_all(indcs,:);
dlmwrite('naive_pass_10.dat',naive_pass_10, ' ')
indcs=(naive_fail_all(:,2)==10); % select cycles that failed containing 10 upd
naive_fail_10=naive_fail_all(indcs,:);
dlmwrite('naive_fail_10.dat',naive_fail_10, ' ')
indcs=(naive_pass_all(:,2)==11); % select cycles that passed containing 11 upd
naive_pass_11=naive_pass_all(indcs,:);
dlmwrite('naive_pass_11.dat',naive_pass_11, ' ')
indcs=(naive_fail_all(:,2)==11); % select cycles that failed containing 11 upd
naive_fail_11=naive_fail_all(indcs,:);
dlmwrite('naive_fail_11.dat',naive_fail_11, ' ')
indcs=(naive_pass_all(:,2)==12); % select cycles that passed containing 12 upd
naive_pass_12=naive_pass_all(indcs,:);
dlmwrite('naive_pass_12.dat',naive_pass_12, ' ')
indcs=(naive_fail_all(:,2)==12); % select cycles that failed containing 12 upd
naive_fail_12=naive_fail_all(indcs,:);
dlmwrite('naive_fail_12.dat',naive_fail_12, ' ')
indcs=(naive_pass_all(:,2)==13); % select cycles that passed containing 13 upd
naive_pass_13=naive_pass_all(indcs,:);
dlmwrite('naive_pass_13.dat',naive_pass_13, ' ')
indcs=(naive_fail_all(:,2)==13); % select cycles that failed containing 13 upd
naive_fail_13=naive_fail_all(indcs,:);
dlmwrite('naive_fail_13.dat',naive_fail_13, ' ')
indcs=(naive_pass_all(:,2)==14); % select cycles that passed containing 14 upd
naive_pass_14=naive_pass_all(indcs,:);
dlmwrite('naive_pass_14.dat',naive_pass_14, ' ')
indcs=(naive_fail_all(:,2)==14); % select cycles that failed containing 14 upd
naive_fail_14=naive_fail_all(indcs,:);
dlmwrite('naive_fail_14.dat',naive_fail_14, ' ')
indcs=(naive_pass_all(:,2)==15); % select cycles that passed containing 15 upd
naive_pass_15=naive_pass_all(indcs,:);
dlmwrite('naive_pass_15.dat',naive_pass_15, ' ')
indcs=(naive_fail_all(:,2)==15); % select cycles that failed containing 15 upd
naive_fail_15=naive_fail_all(indcs,:);
dlmwrite('naive_fail_15.dat',naive_fail_15, ' ')
indcs=(data_later(:,5)==0); % select cycles that passed
later_pass_all=data_later(indcs,:);
dlmwrite('later_pass_all.dat',later_pass_all, ' ')
indcs=(data_later(:,5)!=0); % select cycles that failed
later_fail_all=data_later(indcs,:);
dlmwrite('later_fail_all.dat',later_fail_all, ' ')
indcs=(later_pass_all(:,2)==1); % select cycles that passed containing 1 upd
later_pass_1=later_pass_all(indcs,:);
dlmwrite('later_pass_1.dat',later_pass_1, ' ')
indcs=(later_fail_all(:,2)==1); % select cycles that failed containing 1 upd
later_fail_1=later_fail_all(indcs,:);
dlmwrite('later_fail_1.dat',later_fail_1, ' ')
indcs=(later_pass_all(:,2)==2); % select cycles that passed containing 2 upd
later_pass_2=later_pass_all(indcs,:);
dlmwrite('later_pass_2.dat',later_pass_2, ' ')
indcs=(later_fail_all(:,2)==2); % select cycles that failed containing 2 upd
later_fail_2=later_fail_all(indcs,:);
dlmwrite('later_fail_2.dat',later_fail_2, ' ')
indcs=(later_pass_all(:,2)==3); % select cycles that passed containing 3 upd
later_pass_3=later_pass_all(indcs,:);
dlmwrite('later_pass_3.dat',later_pass_3, ' ')
indcs=(later_fail_all(:,2)==3); % select cycles that failed containing 3 upd
later_fail_3=later_fail_all(indcs,:);
dlmwrite('later_fail_3.dat',later_fail_3, ' ')
indcs=(later_pass_all(:,2)==4); % select cycles that passed containing 4 upd
later_pass_4=later_pass_all(indcs,:);
dlmwrite('later_pass_4.dat',later_pass_4, ' ')
indcs=(later_fail_all(:,2)==4); % select cycles that failed containing 4 upd
later_fail_4=later_fail_all(indcs,:);
dlmwrite('later_fail_4.dat',later_fail_4, ' ')
indcs=(later_pass_all(:,2)==5); % select cycles that passed containing 5 upd
later_pass_5=later_pass_all(indcs,:);
dlmwrite('later_pass_5.dat',later_pass_5, ' ')
indcs=(later_fail_all(:,2)==5); % select cycles that failed containing 5 upd
later_fail_5=later_fail_all(indcs,:);
dlmwrite('later_fail_5.dat',later_fail_5, ' ')
indcs=(later_pass_all(:,2)==6); % select cycles that passed containing 6 upd
later_pass_6=later_pass_all(indcs,:);
dlmwrite('later_pass_6.dat',later_pass_6, ' ')
indcs=(later_fail_all(:,2)==6); % select cycles that failed containing 6 upd
later_fail_6=later_fail_all(indcs,:);
dlmwrite('later_fail_6.dat',later_fail_6, ' ')
indcs=(later_pass_all(:,2)==7); % select cycles that passed containing 7 upd
later_pass_7=later_pass_all(indcs,:);
dlmwrite('later_pass_7.dat',later_pass_7, ' ')
indcs=(later_fail_all(:,2)==7); % select cycles that failed containing 7 upd
later_fail_7=later_fail_all(indcs,:);
dlmwrite('later_fail_7.dat',later_fail_7, ' ')
indcs=(later_pass_all(:,2)==8); % select cycles that passed containing 8 upd
later_pass_8=later_pass_all(indcs,:);
dlmwrite('later_pass_8.dat',later_pass_8, ' ')
indcs=(later_fail_all(:,2)==8); % select cycles that failed containing 8 upd
later_fail_8=later_fail_all(indcs,:);
dlmwrite('later_fail_8.dat',later_fail_8, ' ')
indcs=(later_pass_all(:,2)==9); % select cycles that passed containing 9 upd
later_pass_9=later_pass_all(indcs,:);
dlmwrite('later_pass_9.dat',later_pass_9, ' ')
indcs=(later_fail_all(:,2)==9); % select cycles that failed containing 9 upd
later_fail_9=later_fail_all(indcs,:);
dlmwrite('later_fail_9.dat',later_fail_9, ' ')
indcs=(later_pass_all(:,2)==10); % select cycles that passed containing 10 upd
later_pass_10=later_pass_all(indcs,:);
dlmwrite('later_pass_10.dat',later_pass_10, ' ')
indcs=(later_fail_all(:,2)==10); % select cycles that failed containing 10 upd
later_fail_10=later_fail_all(indcs,:);
dlmwrite('later_fail_10.dat',later_fail_10, ' ')
indcs=(later_pass_all(:,2)==11); % select cycles that passed containing 11 upd
later_pass_11=later_pass_all(indcs,:);
dlmwrite('later_pass_11.dat',later_pass_11, ' ')
indcs=(later_fail_all(:,2)==11); % select cycles that failed containing 11 upd
later_fail_11=later_fail_all(indcs,:);
dlmwrite('later_fail_11.dat',later_fail_11, ' ')
indcs=(later_pass_all(:,2)==12); % select cycles that passed containing 12 upd
later_pass_12=later_pass_all(indcs,:);
dlmwrite('later_pass_12.dat',later_pass_12, ' ')
indcs=(later_fail_all(:,2)==12); % select cycles that failed containing 12 upd
later_fail_12=later_fail_all(indcs,:);
dlmwrite('later_fail_12.dat',later_fail_12, ' ')
indcs=(later_pass_all(:,2)==13); % select cycles that passed containing 13 upd
later_pass_13=later_pass_all(indcs,:);
dlmwrite('later_pass_13.dat',later_pass_13, ' ')
indcs=(later_fail_all(:,2)==13); % select cycles that failed containing 13 upd
later_fail_13=later_fail_all(indcs,:);
dlmwrite('later_fail_13.dat',later_fail_13, ' ')
indcs=(later_pass_all(:,2)==14); % select cycles that passed containing 14 upd
later_pass_14=later_pass_all(indcs,:);
dlmwrite('later_pass_14.dat',later_pass_14, ' ')
indcs=(later_fail_all(:,2)==14); % select cycles that failed containing 14 upd
later_fail_14=later_fail_all(indcs,:);
dlmwrite('later_fail_14.dat',later_fail_14, ' ')
indcs=(later_pass_all(:,2)==15); % select cycles that passed containing 15 upd
later_pass_15=later_pass_all(indcs,:);
dlmwrite('later_pass_15.dat',later_pass_15, ' ')
indcs=(later_fail_all(:,2)==15); % select cycles that failed containing 15 upd
later_fail_15=later_fail_all(indcs,:);
dlmwrite('later_fail_15.dat',later_fail_15, ' ')
indcs=(data_split(:,5)==0); % select cycles that passed
split_pass_all=data_split(indcs,:);
dlmwrite('split_pass_all.dat',split_pass_all, ' ')
indcs=(data_split(:,5)!=0); % select cycles that failed
split_fail_all=data_split(indcs,:);
dlmwrite('split_fail_all.dat',split_fail_all, ' ')
indcs=(split_pass_all(:,2)==1); % select cycles that passed containing 1 upd
split_pass_1=split_pass_all(indcs,:);
dlmwrite('split_pass_1.dat',split_pass_1, ' ')
indcs=(split_fail_all(:,2)==1); % select cycles that failed containing 1 upd
split_fail_1=split_fail_all(indcs,:);
dlmwrite('split_fail_1.dat',split_fail_1, ' ')
indcs=(split_pass_all(:,2)==2); % select cycles that passed containing 2 upd
split_pass_2=split_pass_all(indcs,:);
dlmwrite('split_pass_2.dat',split_pass_2, ' ')
indcs=(split_fail_all(:,2)==2); % select cycles that failed containing 2 upd
split_fail_2=split_fail_all(indcs,:);
dlmwrite('split_fail_2.dat',split_fail_2, ' ')
indcs=(split_pass_all(:,2)==3); % select cycles that passed containing 3 upd
split_pass_3=split_pass_all(indcs,:);
dlmwrite('split_pass_3.dat',split_pass_3, ' ')
indcs=(split_fail_all(:,2)==3); % select cycles that failed containing 3 upd
split_fail_3=split_fail_all(indcs,:);
dlmwrite('split_fail_3.dat',split_fail_3, ' ')
indcs=(split_pass_all(:,2)==4); % select cycles that passed containing 4 upd
split_pass_4=split_pass_all(indcs,:);
dlmwrite('split_pass_4.dat',split_pass_4, ' ')
indcs=(split_fail_all(:,2)==4); % select cycles that failed containing 4 upd
split_fail_4=split_fail_all(indcs,:);
dlmwrite('split_fail_4.dat',split_fail_4, ' ')
indcs=(split_pass_all(:,2)==5); % select cycles that passed containing 5 upd
split_pass_5=split_pass_all(indcs,:);
dlmwrite('split_pass_5.dat',split_pass_5, ' ')
indcs=(split_fail_all(:,2)==5); % select cycles that failed containing 5 upd
split_fail_5=split_fail_all(indcs,:);
dlmwrite('split_fail_5.dat',split_fail_5, ' ')
indcs=(split_pass_all(:,2)==6); % select cycles that passed containing 6 upd
split_pass_6=split_pass_all(indcs,:);
dlmwrite('split_pass_6.dat',split_pass_6, ' ')
indcs=(split_fail_all(:,2)==6); % select cycles that failed containing 6 upd
split_fail_6=split_fail_all(indcs,:);
dlmwrite('split_fail_6.dat',split_fail_6, ' ')
indcs=(split_pass_all(:,2)==7); % select cycles that passed containing 7 upd
split_pass_7=split_pass_all(indcs,:);
dlmwrite('split_pass_7.dat',split_pass_7, ' ')
indcs=(split_fail_all(:,2)==7); % select cycles that failed containing 7 upd
split_fail_7=split_fail_all(indcs,:);
dlmwrite('split_fail_7.dat',split_fail_7, ' ')
indcs=(split_pass_all(:,2)==8); % select cycles that passed containing 8 upd
split_pass_8=split_pass_all(indcs,:);
dlmwrite('split_pass_8.dat',split_pass_8, ' ')
indcs=(split_fail_all(:,2)==8); % select cycles that failed containing 8 upd
split_fail_8=split_fail_all(indcs,:);
dlmwrite('split_fail_8.dat',split_fail_8, ' ')
indcs=(split_pass_all(:,2)==9); % select cycles that passed containing 9 upd
split_pass_9=split_pass_all(indcs,:);
dlmwrite('split_pass_9.dat',split_pass_9, ' ')
indcs=(split_fail_all(:,2)==9); % select cycles that failed containing 9 upd
split_fail_9=split_fail_all(indcs,:);
dlmwrite('split_fail_9.dat',split_fail_9, ' ')
indcs=(split_pass_all(:,2)==10); % select cycles that passed containing 10 upd
split_pass_10=split_pass_all(indcs,:);
dlmwrite('split_pass_10.dat',split_pass_10, ' ')
indcs=(split_fail_all(:,2)==10); % select cycles that failed containing 10 upd
split_fail_10=split_fail_all(indcs,:);
dlmwrite('split_fail_10.dat',split_fail_10, ' ')
indcs=(split_pass_all(:,2)==11); % select cycles that passed containing 11 upd
split_pass_11=split_pass_all(indcs,:);
dlmwrite('split_pass_11.dat',split_pass_11, ' ')
indcs=(split_fail_all(:,2)==11); % select cycles that failed containing 11 upd
split_fail_11=split_fail_all(indcs,:);
dlmwrite('split_fail_11.dat',split_fail_11, ' ')
indcs=(split_pass_all(:,2)==12); % select cycles that passed containing 12 upd
split_pass_12=split_pass_all(indcs,:);
dlmwrite('split_pass_12.dat',split_pass_12, ' ')
indcs=(split_fail_all(:,2)==12); % select cycles that failed containing 12 upd
split_fail_12=split_fail_all(indcs,:);
dlmwrite('split_fail_12.dat',split_fail_12, ' ')
indcs=(split_pass_all(:,2)==13); % select cycles that passed containing 13 upd
split_pass_13=split_pass_all(indcs,:);
dlmwrite('split_pass_13.dat',split_pass_13, ' ')
indcs=(split_fail_all(:,2)==13); % select cycles that failed containing 13 upd
split_fail_13=split_fail_all(indcs,:);
dlmwrite('split_fail_13.dat',split_fail_13, ' ')
indcs=(split_pass_all(:,2)==14); % select cycles that passed containing 14 upd
split_pass_14=split_pass_all(indcs,:);
dlmwrite('split_pass_14.dat',split_pass_14, ' ')
indcs=(split_fail_all(:,2)==14); % select cycles that failed containing 14 upd
split_fail_14=split_fail_all(indcs,:);
dlmwrite('split_fail_14.dat',split_fail_14, ' ')
indcs=(split_pass_all(:,2)==15); % select cycles that passed containing 15 upd
split_pass_15=split_pass_all(indcs,:);
dlmwrite('split_pass_15.dat',split_pass_15, ' ')
indcs=(split_fail_all(:,2)==15); % select cycles that failed containing 15 upd
split_fail_15=split_fail_all(indcs,:);
dlmwrite('split_fail_15.dat',split_fail_15, ' ')
indcs=(data_blocked(:,5)==0); % select cycles that passed
blocked_pass_all=data_blocked(indcs,:);
dlmwrite('blocked_pass_all.dat',blocked_pass_all, ' ')
indcs=(data_blocked(:,5)!=0); % select cycles that failed
blocked_fail_all=data_blocked(indcs,:);
dlmwrite('blocked_fail_all.dat',blocked_fail_all, ' ')
indcs=(blocked_pass_all(:,2)==1); % select cycles that passed containing 1 upd
blocked_pass_1=blocked_pass_all(indcs,:);
dlmwrite('blocked_pass_1.dat',blocked_pass_1, ' ')
indcs=(blocked_fail_all(:,2)==1); % select cycles that failed containing 1 upd
blocked_fail_1=blocked_fail_all(indcs,:);
dlmwrite('blocked_fail_1.dat',blocked_fail_1, ' ')
indcs=(blocked_pass_all(:,2)==2); % select cycles that passed containing 2 upd
blocked_pass_2=blocked_pass_all(indcs,:);
dlmwrite('blocked_pass_2.dat',blocked_pass_2, ' ')
indcs=(blocked_fail_all(:,2)==2); % select cycles that failed containing 2 upd
blocked_fail_2=blocked_fail_all(indcs,:);
dlmwrite('blocked_fail_2.dat',blocked_fail_2, ' ')
indcs=(blocked_pass_all(:,2)==3); % select cycles that passed containing 3 upd
blocked_pass_3=blocked_pass_all(indcs,:);
dlmwrite('blocked_pass_3.dat',blocked_pass_3, ' ')
indcs=(blocked_fail_all(:,2)==3); % select cycles that failed containing 3 upd
blocked_fail_3=blocked_fail_all(indcs,:);
dlmwrite('blocked_fail_3.dat',blocked_fail_3, ' ')
indcs=(blocked_pass_all(:,2)==4); % select cycles that passed containing 4 upd
blocked_pass_4=blocked_pass_all(indcs,:);
dlmwrite('blocked_pass_4.dat',blocked_pass_4, ' ')
indcs=(blocked_fail_all(:,2)==4); % select cycles that failed containing 4 upd
blocked_fail_4=blocked_fail_all(indcs,:);
dlmwrite('blocked_fail_4.dat',blocked_fail_4, ' ')
indcs=(blocked_pass_all(:,2)==5); % select cycles that passed containing 5 upd
blocked_pass_5=blocked_pass_all(indcs,:);
dlmwrite('blocked_pass_5.dat',blocked_pass_5, ' ')
indcs=(blocked_fail_all(:,2)==5); % select cycles that failed containing 5 upd
blocked_fail_5=blocked_fail_all(indcs,:);
dlmwrite('blocked_fail_5.dat',blocked_fail_5, ' ')
indcs=(blocked_pass_all(:,2)==6); % select cycles that passed containing 6 upd
blocked_pass_6=blocked_pass_all(indcs,:);
dlmwrite('blocked_pass_6.dat',blocked_pass_6, ' ')
indcs=(blocked_fail_all(:,2)==6); % select cycles that failed containing 6 upd
blocked_fail_6=blocked_fail_all(indcs,:);
dlmwrite('blocked_fail_6.dat',blocked_fail_6, ' ')
indcs=(blocked_pass_all(:,2)==7); % select cycles that passed containing 7 upd
blocked_pass_7=blocked_pass_all(indcs,:);
dlmwrite('blocked_pass_7.dat',blocked_pass_7, ' ')
indcs=(blocked_fail_all(:,2)==7); % select cycles that failed containing 7 upd
blocked_fail_7=blocked_fail_all(indcs,:);
dlmwrite('blocked_fail_7.dat',blocked_fail_7, ' ')
indcs=(blocked_pass_all(:,2)==8); % select cycles that passed containing 8 upd
blocked_pass_8=blocked_pass_all(indcs,:);
dlmwrite('blocked_pass_8.dat',blocked_pass_8, ' ')
indcs=(blocked_fail_all(:,2)==8); % select cycles that failed containing 8 upd
blocked_fail_8=blocked_fail_all(indcs,:);
dlmwrite('blocked_fail_8.dat',blocked_fail_8, ' ')
indcs=(blocked_pass_all(:,2)==9); % select cycles that passed containing 9 upd
blocked_pass_9=blocked_pass_all(indcs,:);
dlmwrite('blocked_pass_9.dat',blocked_pass_9, ' ')
indcs=(blocked_fail_all(:,2)==9); % select cycles that failed containing 9 upd
blocked_fail_9=blocked_fail_all(indcs,:);
dlmwrite('blocked_fail_9.dat',blocked_fail_9, ' ')
indcs=(blocked_pass_all(:,2)==10); % select cycles that passed containing 10 upd
blocked_pass_10=blocked_pass_all(indcs,:);
dlmwrite('blocked_pass_10.dat',blocked_pass_10, ' ')
indcs=(blocked_fail_all(:,2)==10); % select cycles that failed containing 10 upd
blocked_fail_10=blocked_fail_all(indcs,:);
dlmwrite('blocked_fail_10.dat',blocked_fail_10, ' ')
indcs=(blocked_pass_all(:,2)==11); % select cycles that passed containing 11 upd
blocked_pass_11=blocked_pass_all(indcs,:);
dlmwrite('blocked_pass_11.dat',blocked_pass_11, ' ')
indcs=(blocked_fail_all(:,2)==11); % select cycles that failed containing 11 upd
blocked_fail_11=blocked_fail_all(indcs,:);
dlmwrite('blocked_fail_11.dat',blocked_fail_11, ' ')
indcs=(blocked_pass_all(:,2)==12); % select cycles that passed containing 12 upd
blocked_pass_12=blocked_pass_all(indcs,:);
dlmwrite('blocked_pass_12.dat',blocked_pass_12, ' ')
indcs=(blocked_fail_all(:,2)==12); % select cycles that failed containing 12 upd
blocked_fail_12=blocked_fail_all(indcs,:);
dlmwrite('blocked_fail_12.dat',blocked_fail_12, ' ')
indcs=(blocked_pass_all(:,2)==13); % select cycles that passed containing 13 upd
blocked_pass_13=blocked_pass_all(indcs,:);
dlmwrite('blocked_pass_13.dat',blocked_pass_13, ' ')
indcs=(blocked_fail_all(:,2)==13); % select cycles that failed containing 13 upd
blocked_fail_13=blocked_fail_all(indcs,:);
dlmwrite('blocked_fail_13.dat',blocked_fail_13, ' ')
indcs=(blocked_pass_all(:,2)==14); % select cycles that passed containing 14 upd
blocked_pass_14=blocked_pass_all(indcs,:);
dlmwrite('blocked_pass_14.dat',blocked_pass_14, ' ')
indcs=(blocked_fail_all(:,2)==14); % select cycles that failed containing 14 upd
blocked_fail_14=blocked_fail_all(indcs,:);
dlmwrite('blocked_fail_14.dat',blocked_fail_14, ' ')
indcs=(blocked_pass_all(:,2)==15); % select cycles that passed containing 15 upd
blocked_pass_15=blocked_pass_all(indcs,:);
dlmwrite('blocked_pass_15.dat',blocked_pass_15, ' ')
indcs=(blocked_fail_all(:,2)==15); % select cycles that failed containing 15 upd
blocked_fail_15=blocked_fail_all(indcs,:);
dlmwrite('blocked_fail_15.dat',blocked_fail_15, ' ')
indcs=(data_lapack(:,5)==0); % select cycles that passed
lapack_pass_all=data_lapack(indcs,:);
dlmwrite('lapack_pass_all.dat',lapack_pass_all, ' ')
indcs=(data_lapack(:,5)!=0); % select cycles that failed
lapack_fail_all=data_lapack(indcs,:);
dlmwrite('lapack_fail_all.dat',lapack_fail_all, ' ')
indcs=(lapack_pass_all(:,2)==1); % select cycles that passed containing 1 upd
lapack_pass_1=lapack_pass_all(indcs,:);
dlmwrite('lapack_pass_1.dat',lapack_pass_1, ' ')
indcs=(lapack_fail_all(:,2)==1); % select cycles that failed containing 1 upd
lapack_fail_1=lapack_fail_all(indcs,:);
dlmwrite('lapack_fail_1.dat',lapack_fail_1, ' ')
indcs=(lapack_pass_all(:,2)==2); % select cycles that passed containing 2 upd
lapack_pass_2=lapack_pass_all(indcs,:);
dlmwrite('lapack_pass_2.dat',lapack_pass_2, ' ')
indcs=(lapack_fail_all(:,2)==2); % select cycles that failed containing 2 upd
lapack_fail_2=lapack_fail_all(indcs,:);
dlmwrite('lapack_fail_2.dat',lapack_fail_2, ' ')
indcs=(lapack_pass_all(:,2)==3); % select cycles that passed containing 3 upd
lapack_pass_3=lapack_pass_all(indcs,:);
dlmwrite('lapack_pass_3.dat',lapack_pass_3, ' ')
indcs=(lapack_fail_all(:,2)==3); % select cycles that failed containing 3 upd
lapack_fail_3=lapack_fail_all(indcs,:);
dlmwrite('lapack_fail_3.dat',lapack_fail_3, ' ')
indcs=(lapack_pass_all(:,2)==4); % select cycles that passed containing 4 upd
lapack_pass_4=lapack_pass_all(indcs,:);
dlmwrite('lapack_pass_4.dat',lapack_pass_4, ' ')
indcs=(lapack_fail_all(:,2)==4); % select cycles that failed containing 4 upd
lapack_fail_4=lapack_fail_all(indcs,:);
dlmwrite('lapack_fail_4.dat',lapack_fail_4, ' ')
indcs=(lapack_pass_all(:,2)==5); % select cycles that passed containing 5 upd
lapack_pass_5=lapack_pass_all(indcs,:);
dlmwrite('lapack_pass_5.dat',lapack_pass_5, ' ')
indcs=(lapack_fail_all(:,2)==5); % select cycles that failed containing 5 upd
lapack_fail_5=lapack_fail_all(indcs,:);
dlmwrite('lapack_fail_5.dat',lapack_fail_5, ' ')
indcs=(lapack_pass_all(:,2)==6); % select cycles that passed containing 6 upd
lapack_pass_6=lapack_pass_all(indcs,:);
dlmwrite('lapack_pass_6.dat',lapack_pass_6, ' ')
indcs=(lapack_fail_all(:,2)==6); % select cycles that failed containing 6 upd
lapack_fail_6=lapack_fail_all(indcs,:);
dlmwrite('lapack_fail_6.dat',lapack_fail_6, ' ')
indcs=(lapack_pass_all(:,2)==7); % select cycles that passed containing 7 upd
lapack_pass_7=lapack_pass_all(indcs,:);
dlmwrite('lapack_pass_7.dat',lapack_pass_7, ' ')
indcs=(lapack_fail_all(:,2)==7); % select cycles that failed containing 7 upd
lapack_fail_7=lapack_fail_all(indcs,:);
dlmwrite('lapack_fail_7.dat',lapack_fail_7, ' ')
indcs=(lapack_pass_all(:,2)==8); % select cycles that passed containing 8 upd
lapack_pass_8=lapack_pass_all(indcs,:);
dlmwrite('lapack_pass_8.dat',lapack_pass_8, ' ')
indcs=(lapack_fail_all(:,2)==8); % select cycles that failed containing 8 upd
lapack_fail_8=lapack_fail_all(indcs,:);
dlmwrite('lapack_fail_8.dat',lapack_fail_8, ' ')
indcs=(lapack_pass_all(:,2)==9); % select cycles that passed containing 9 upd
lapack_pass_9=lapack_pass_all(indcs,:);
dlmwrite('lapack_pass_9.dat',lapack_pass_9, ' ')
indcs=(lapack_fail_all(:,2)==9); % select cycles that failed containing 9 upd
lapack_fail_9=lapack_fail_all(indcs,:);
dlmwrite('lapack_fail_9.dat',lapack_fail_9, ' ')
indcs=(lapack_pass_all(:,2)==10); % select cycles that passed containing 10 upd
lapack_pass_10=lapack_pass_all(indcs,:);
dlmwrite('lapack_pass_10.dat',lapack_pass_10, ' ')
indcs=(lapack_fail_all(:,2)==10); % select cycles that failed containing 10 upd
lapack_fail_10=lapack_fail_all(indcs,:);
dlmwrite('lapack_fail_10.dat',lapack_fail_10, ' ')
indcs=(lapack_pass_all(:,2)==11); % select cycles that passed containing 11 upd
lapack_pass_11=lapack_pass_all(indcs,:);
dlmwrite('lapack_pass_11.dat',lapack_pass_11, ' ')
indcs=(lapack_fail_all(:,2)==11); % select cycles that failed containing 11 upd
lapack_fail_11=lapack_fail_all(indcs,:);
dlmwrite('lapack_fail_11.dat',lapack_fail_11, ' ')
indcs=(lapack_pass_all(:,2)==12); % select cycles that passed containing 12 upd
lapack_pass_12=lapack_pass_all(indcs,:);
dlmwrite('lapack_pass_12.dat',lapack_pass_12, ' ')
indcs=(lapack_fail_all(:,2)==12); % select cycles that failed containing 12 upd
lapack_fail_12=lapack_fail_all(indcs,:);
dlmwrite('lapack_fail_12.dat',lapack_fail_12, ' ')
indcs=(lapack_pass_all(:,2)==13); % select cycles that passed containing 13 upd
lapack_pass_13=lapack_pass_all(indcs,:);
dlmwrite('lapack_pass_13.dat',lapack_pass_13, ' ')
indcs=(lapack_fail_all(:,2)==13); % select cycles that failed containing 13 upd
lapack_fail_13=lapack_fail_all(indcs,:);
dlmwrite('lapack_fail_13.dat',lapack_fail_13, ' ')
indcs=(lapack_pass_all(:,2)==14); % select cycles that passed containing 14 upd
lapack_pass_14=lapack_pass_all(indcs,:);
dlmwrite('lapack_pass_14.dat',lapack_pass_14, ' ')
indcs=(lapack_fail_all(:,2)==14); % select cycles that failed containing 14 upd
lapack_fail_14=lapack_fail_all(indcs,:);
dlmwrite('lapack_fail_14.dat',lapack_fail_14, ' ')
indcs=(lapack_pass_all(:,2)==15); % select cycles that passed containing 15 upd
lapack_pass_15=lapack_pass_all(indcs,:);
dlmwrite('lapack_pass_15.dat',lapack_pass_15, ' ')
indcs=(lapack_fail_all(:,2)==15); % select cycles that failed containing 15 upd
lapack_fail_15=lapack_fail_all(indcs,:);
dlmwrite('lapack_fail_15.dat',lapack_fail_15, ' ')
indcs=(data_wb2(:,5)==0); % select cycles that passed
wb2_pass_all=data_wb2(indcs,:);
dlmwrite('wb2_pass_all.dat',wb2_pass_all, ' ')
indcs=(data_wb2(:,5)!=0); % select cycles that failed
wb2_fail_all=data_wb2(indcs,:);
dlmwrite('wb2_fail_all.dat',wb2_fail_all, ' ')
indcs=(data_wb3(:,5)==0); % select cycles that passed
wb3_pass_all=data_wb3(indcs,:);
dlmwrite('wb3_pass_all.dat',wb3_pass_all, ' ')
indcs=(data_wb3(:,5)!=0); % select cycles that failed
wb3_fail_all=data_wb3(indcs,:);
dlmwrite('wb3_fail_all.dat',wb3_fail_all, ' ')
n_all_cycles=size(data_anthony)(1);
n_1_cycles=size(anthony_pass_1)(1)+size(anthony_fail_1)(1);
n_2_cycles=size(anthony_pass_2)(1)+size(anthony_fail_2)(1);
n_3_cycles=size(anthony_pass_3)(1)+size(anthony_fail_3)(1);
n_6_cycles=size(anthony_pass_6)(1)+size(anthony_fail_6)(1);
fail_rate_all_anthony=sum(anthony_fail_all(:,5))/n_all_cycles;
fail_rate_all_naive=sum(naive_fail_all(:,5))/n_all_cycles;
fail_rate_all_later=sum(later_fail_all(:,5))/n_all_cycles;
fail_rate_all_split=sum(split_fail_all(:,5))/n_all_cycles;
fail_rate_all_blocked=sum(blocked_fail_all(:,5))/n_all_cycles;
fail_rate_all_lapack=sum(lapack_fail_all(:,5))/n_all_cycles;
fail_rate_1_anthony=sum(anthony_fail_1(:,5))/n_1_cycles;
fail_rate_1_naive=sum(naive_fail_1(:,5))/n_1_cycles;
fail_rate_1_later=sum(later_fail_1(:,5))/n_1_cycles;
fail_rate_1_split=sum(split_fail_1(:,5))/n_1_cycles;
fail_rate_1_blocked=sum(blocked_fail_1(:,5))/n_1_cycles;
fail_rate_1_lapack=sum(lapack_fail_1(:,5))/n_1_cycles;
fail_rate_2_anthony=sum(anthony_fail_2(:,5))/n_2_cycles;
fail_rate_2_naive=sum(naive_fail_2(:,5))/n_2_cycles;
fail_rate_2_later=sum(later_fail_2(:,5))/n_2_cycles;
fail_rate_2_split=sum(split_fail_2(:,5))/n_2_cycles;
fail_rate_2_blocked=sum(blocked_fail_2(:,5))/n_2_cycles;
fail_rate_2_lapack=sum(lapack_fail_2(:,5))/n_2_cycles;
fail_rate_wb2=sum(data_wb2(:,5))/n_2_cycles;
fail_rate_3_anthony=sum(anthony_fail_3(:,5))/n_3_cycles;
fail_rate_3_naive=sum(naive_fail_3(:,5))/n_3_cycles;
fail_rate_3_later=sum(later_fail_3(:,5))/n_3_cycles;
fail_rate_3_split=sum(split_fail_3(:,5))/n_3_cycles;
fail_rate_3_blocked=sum(blocked_fail_3(:,5))/n_3_cycles;
fail_rate_3_lapack=sum(lapack_fail_3(:,5))/n_3_cycles;
fail_rate_wb3=sum(data_wb3(:,5))/n_3_cycles;
fail_rate_6_anthony=sum(anthony_fail_6(:,5))/n_6_cycles;
fail_rate_6_naive=sum(naive_fail_6(:,5))/n_6_cycles;
fail_rate_6_later=sum(later_fail_6(:,5))/n_6_cycles;
fail_rate_6_split=sum(split_fail_6(:,5))/n_6_cycles;
fail_rate_6_lapack=sum(lapack_fail_6(:,5))/n_6_cycles;
fail_rate_6_blocked=sum(blocked_fail_6(:,5))/n_6_cycles;
printf("\n");
printf ("Fail rates for all (N=%d) cycles\n", n_all_cycles);
printf ("-------------------------------------------------------------------------------------------\n");
printf ("Anthony:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_all_anthony*100, fail_rate_all_anthony, size(anthony_pass_all)(1), size(anthony_fail_all)(1), size(anthony_pass_all)(1)+size(anthony_fail_all)(1));
printf ("Naive:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_all_naive*100, fail_rate_all_naive, size(naive_pass_all)(1), size(naive_fail_all)(1), size(naive_pass_all)(1)+size(naive_fail_all)(1));
printf ("Later:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_all_later*100, fail_rate_all_later, size(later_pass_all)(1), size(later_fail_all)(1), size(later_pass_all)(1)+size(later_fail_all)(1));
printf ("Splitting:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_all_split*100, fail_rate_all_split, size(split_pass_all)(1), size(split_fail_all)(1), size(split_pass_all)(1)+size(split_fail_all)(1));
printf ("Blocked:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_all_blocked*100, fail_rate_all_blocked, size(blocked_pass_all)(1), size(blocked_fail_all)(1), size(blocked_pass_all)(1)+size(blocked_fail_all)(1));
printf ("Lapack:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_all_lapack*100, fail_rate_all_lapack, size(lapack_pass_all)(1), size(lapack_fail_all)(1), size(lapack_pass_all)(1)+size(lapack_fail_all)(1));
printf("\n");
printf ("Fail rates for cycles containing 1 update (N=%d) (solely due to numerical noise)\n", n_1_cycles);
printf ("-------------------------------------------------------------------------------------------\n");
printf ("Anthony:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_1_anthony*100, fail_rate_1_anthony, size(anthony_pass_1)(1), size(anthony_fail_1)(1), n_1_cycles);
printf ("Naive:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_1_naive*100, fail_rate_1_naive, size(naive_pass_1)(1), size(naive_fail_1)(1), size(naive_pass_1)(1)+size(naive_fail_1)(1));
printf ("Later:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_1_later*100, fail_rate_1_later, size(later_pass_1)(1), size(later_fail_1)(1), size(later_pass_1)(1)+size(later_fail_1)(1));
printf ("Splitting:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_1_split*100, fail_rate_1_split, size(split_pass_1)(1), size(split_fail_1)(1), size(split_pass_1)(1)+size(split_fail_1)(1));
printf ("Blocked:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_1_blocked*100, fail_rate_1_blocked, size(blocked_pass_1)(1), size(blocked_fail_1)(1), size(blocked_pass_1)(1)+size(blocked_fail_1)(1));
printf ("Lapack:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_1_lapack*100, fail_rate_1_lapack, size(lapack_pass_1)(1), size(lapack_fail_1)(1), size(lapack_pass_1)(1)+size(lapack_fail_1)(1));
printf("\n");
printf ("Fail rates for cycles containing 2 updates (N=%d) (compare blocked w/ WB2)\n", n_2_cycles);
printf ("-------------------------------------------------------------------------------------------\n");
printf ("Anthony:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_2_anthony*100, fail_rate_2_anthony, size(anthony_pass_2)(1), size(anthony_fail_2)(1), n_2_cycles);
printf ("Naive:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_2_naive*100, fail_rate_2_naive, size(naive_pass_2)(1), size(naive_fail_2)(1), size(naive_pass_2)(1)+size(naive_fail_2)(1));
printf ("Later:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_2_later*100, fail_rate_2_later, size(later_pass_2)(1), size(later_fail_2)(1), size(later_pass_2)(1)+size(later_fail_2)(1));
printf ("Splitting:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_2_split*100, fail_rate_2_split, size(split_pass_2)(1), size(split_fail_2)(1), size(split_pass_2)(1)+size(split_fail_2)(1));
printf ("Blocked:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_2_blocked*100, fail_rate_2_blocked, size(blocked_pass_2)(1), size(blocked_fail_2)(1), size(blocked_pass_2)(1)+size(blocked_fail_2)(1));
printf ("Lapack:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_2_lapack*100, fail_rate_2_lapack, size(lapack_pass_2)(1), size(lapack_fail_2)(1), size(lapack_pass_2)(1)+size(lapack_fail_2)(1));
printf ("Woodbury 2:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_wb2*100, fail_rate_wb2, size(wb2_pass_all)(1), size(wb2_fail_all)(1), size(wb2_pass_all)(1)+size(wb2_fail_all)(1));
printf("\n");
printf ("Fail rates for cycles containing 3 updates (N=%d) (compare blocked w/ WB3)\n", n_3_cycles);
printf ("-------------------------------------------------------------------------------------------\n");
printf ("Anthony:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_3_anthony*100, fail_rate_3_anthony, size(anthony_pass_3)(1), size(anthony_fail_3)(1), n_3_cycles);
printf ("Naive:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_3_naive*100, fail_rate_3_naive, size(naive_pass_3)(1), size(naive_fail_3)(1), size(naive_pass_3)(1)+size(naive_fail_3)(1));
printf ("Later:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_3_later*100, fail_rate_3_later, size(later_pass_3)(1), size(later_fail_3)(1), size(later_pass_3)(1)+size(later_fail_3)(1));
printf ("Splitting:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_3_split*100, fail_rate_3_split, size(split_pass_3)(1), size(split_fail_3)(1), size(split_pass_3)(1)+size(split_fail_3)(1));
printf ("Blocked:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_3_blocked*100, fail_rate_3_blocked, size(blocked_pass_3)(1), size(blocked_fail_3)(1), size(blocked_pass_3)(1)+size(blocked_fail_3)(1));
printf ("Lapack:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_3_lapack*100, fail_rate_3_lapack, size(lapack_pass_3)(1), size(lapack_fail_3)(1), size(lapack_pass_3)(1)+size(lapack_fail_3)(1));
printf ("Woodbury 3:\t%f (= %f x N cycles; %d pass + %d fail = %d tot.)\n", fail_rate_wb3*100, fail_rate_wb3, size(wb3_pass_all)(1), size(wb3_fail_all)(1), size(wb3_pass_all)(1)+size(wb3_fail_all)(1));
printf("\n");
printf ("Fail rates for cycles containing 6 updates (N=%d) (blocked vs splitting in multiples of 3)\n", n_6_cycles);
printf ("-------------------------------------------------------------------------------------------\n");
printf ("Anthony:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_6_anthony*100, fail_rate_6_anthony, size(anthony_pass_6)(1), size(anthony_fail_6)(1), n_6_cycles);
printf ("Naive:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_6_naive*100, fail_rate_6_naive, size(naive_pass_6)(1), size(naive_fail_6)(1), size(naive_pass_6)(1)+size(naive_fail_6)(1));
printf ("Later:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_6_later*100, fail_rate_6_later, size(later_pass_6)(1), size(later_fail_6)(1), size(later_pass_6)(1)+size(later_fail_6)(1));
printf ("Splitting:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_6_split*100, fail_rate_6_split, size(split_pass_6)(1), size(split_fail_6)(1), size(split_pass_6)(1)+size(split_fail_6)(1));
printf ("Blocked:\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_6_blocked*100, fail_rate_6_blocked, size(blocked_pass_6)(1), size(blocked_fail_6)(1), size(blocked_pass_6)(1)+size(blocked_fail_6)(1));
printf ("Lapack:\t\t%f (= %f x N cycles; %d pass + %d fail = %d tot)\n", fail_rate_6_lapack*100, fail_rate_6_lapack, size(lapack_pass_6)(1), size(lapack_fail_6)(1), size(lapack_pass_6)(1)+size(lapack_fail_6)(1));

View File

@ -0,0 +1,46 @@
#! /bin/octave -qf
if (nargin < 2)
printf ("Call with: ./arguments.m ⟨path/to/data/files⟩ ⟨nr of updates⟩\n");
return;
endif
arg_list = argv ();
INDIR = arg_list{1};
INNR = arg_list{2};
FILE_ANTHONY = [INDIR "/" INNR "_anthony.dat"];
FILE_NAIVE = [INDIR "/" INNR "_naive.dat"];
FILE_SPLIT = [INDIR "/" INNR "_splitting.dat"];
FILE_BLOCK = [INDIR "/" INNR "_blocked.dat"];
data_antho=dlmread(FILE_ANTHONY);
data_naive = dlmread (FILE_NAIVE);
data_split = dlmread (FILE_SPLIT);
data_block = dlmread (FILE_BLOCK);
printf ("\n");
n_cycles = size(data_antho)(1)-2
printf ("\n");
average_cpucycls_p_upd_antho = mean(data_antho(2:n_cycles+1,10))
average_cpucycls_p_upd_naive = mean(data_naive(2:n_cycles+1,10))
average_cpucycls_p_upd_split = mean(data_split(2:n_cycles+1,10))
average_cpucycls_p_upd_block = mean(data_block(2:n_cycles+1,10))
printf ("\n");
std_cpucycls_p_upd_antho = std(data_antho(2:n_cycles+1,10))
std_cpucycls_p_upd_naive = std(data_naive(2:n_cycles+1,10))
std_cpucycls_p_upd_split = std(data_split(2:n_cycles+1,10))
std_cpucycls_p_upd_block = std(data_block(2:n_cycles+1,10))
printf ("\n");
factor_naive = average_cpucycls_p_upd_naive / average_cpucycls_p_upd_antho
factor_split = average_cpucycls_p_upd_split / average_cpucycls_p_upd_antho
factor_block = average_cpucycls_p_upd_block / average_cpucycls_p_upd_antho
printf ("\n");
fail_rate_antho = sum( data_antho(2:n_cycles+1, 5) ) / n_cycles
fail_rate_naive = sum( data_naive(2:n_cycles+1, 5) ) / n_cycles
fail_rate_split = sum( data_split(2:n_cycles+1, 5) ) / n_cycles
fail_rate_block = sum( data_block(2:n_cycles+1, 5) ) / n_cycles

View File

@ -0,0 +1,56 @@
#include <mkl_lapacke.h>
lapack_int inverse(double *A, uint64_t Dim, uint64_t LDS);
uint32_t qmckl_sherman_morrison(
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
const double *__restrict __attribute__((aligned(8))) Updates,
const uint64_t *__restrict Updates_index, const double breakdown,
double *__restrict __attribute__((aligned(8))) Slater_inv,
double *__restrict determinant);
uint32_t qmckl_sherman_morrison_splitting(
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
const double *__restrict __attribute__((aligned(8))) Updates,
const uint64_t *__restrict Updates_index, const double breakdown,
double *__restrict __attribute__((aligned(8))) Slater_inv,
double *__restrict determinant);
uint32_t qmckl_sherman_morrison_smw32s(
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
const double *__restrict __attribute__((aligned(8))) Updates,
const uint64_t *__restrict Updates_index, const double breakdown,
double *__restrict __attribute__((aligned(8))) Slater_inv,
double *__restrict determinant);
uint32_t qmckl_woodbury_3(const uint64_t vLDS, const uint64_t vDim,
const double *__restrict __attribute__((aligned(8)))
Updates,
const uint64_t *__restrict Updates_index,
const double breakdown,
double *__restrict __attribute__((aligned(8)))
Slater_inv,
double *__restrict determinant);
uint32_t qmckl_woodbury_2(const uint64_t vLDS, const uint64_t vDim,
const double *__restrict __attribute__((aligned(8)))
Updates,
const uint64_t *__restrict Updates_index,
const double breakdown,
double *__restrict __attribute__((aligned(8)))
Slater_inv,
double *__restrict determinant);
void detupd(const uint64_t Dim, const uint64_t LDS,
const double *__restrict __attribute__((aligned(8))) Updates,
const uint64_t *__restrict Updates_index,
double *__restrict __attribute__((aligned(8))) Slater_inv,
double *__restrict determinant);
uint32_t qmckl_sherman_morrison_later(
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
const double *__restrict __attribute__((aligned(8))) Updates,
const uint64_t *__restrict Updates_index, const double breakdown,
double *__restrict __attribute__((aligned(8))) Slater_inv,
double *__restrict determinant);

View File

@ -0,0 +1,168 @@
#include "meuk.h"
#include <stdint.h>
#include <assert.h>
void print_matrix(double *A, const uint64_t LDS, const uint64_t Dim) {
for (uint64_t i = 0; i < LDS * Dim; i++) {
printf("%f\n", A[i]);
}
printf("\n");
}
double frobenius_norm2(double *A, const uint64_t LDS, const uint64_t Dim) {
double sum2 = 0;
for (uint64_t i = 0; i < LDS * Dim; i++) sum2 += A[i] * A[i];
return sum2;
}
double frobenius_norm(double *A, const uint64_t LDS, const uint64_t Dim) {
double sum2 = frobenius_norm2(A, LDS, Dim);
return sqrt(sum2);
}
double max_norm(double *A, const uint64_t LDS, const uint64_t Dim) {
double largest = 0;
for (uint64_t i = 0; i < LDS * Dim; i++) {
double elm = A[i];
double felm = fabs(elm);
if (elm != elm) return -1.0; // Return a negative norm when NaN found
if (felm > largest) largest = felm;
}
return largest;
}
double condition_number(double *A, double *Ainv, const uint64_t LDS, const uint64_t Dim) {
double norm_A = frobenius_norm(A, LDS, Dim);
double norm_Ainv = frobenius_norm(Ainv, LDS, Dim);
return fabs(norm_A) * fabs(norm_Ainv);
}
void read_uint(hid_t file_id, const char *key, uint64_t *data) {
herr_t rc;
hid_t dataset_id = H5Dopen2(file_id, key, H5P_DEFAULT);
assert(dataset_id >= 0 && "H5Dopen2");
rc = H5Dread(dataset_id, H5T_NATIVE_ULONG, H5S_ALL, H5S_ALL, H5P_DEFAULT, data);
assert(rc >= 0 && "H5Dread");
rc = H5Dclose(dataset_id);
assert(rc >= 0 && "H5Dclose");
}
void read_double(hid_t file_id, const char *key, double *data) {
herr_t rc;
hid_t dataset_id = H5Dopen2(file_id, key, H5P_DEFAULT);
assert(dataset_id >= 0 && "H5Dopen2");
rc = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data);
assert(rc >= 0 && "H5Dread");
rc = H5Dclose(dataset_id);
assert(rc >= 0 && "H5Dclose");
}
void update_slater_matrix(const uint64_t LDS, const uint64_t Dim,
const uint64_t N_updates, const double *Updates,
const uint64_t *Updates_index, double *Slater) {
for (uint32_t i = 0; i < N_updates; i++) {
uint32_t col = Updates_index[i] - 1;
for (uint32_t j = 0; j < Dim; j++) {
Slater[col * Dim + j] += Updates[i * LDS + j];
}
}
}
uint32_t check_error(const uint64_t LDS, const uint64_t Dim, double *Slater_invT,
double *Slater, const double tolerance) {
double res[Dim*Dim];
for (uint32_t i = 0; i < Dim; i++) {
for (uint32_t j = 0; j < Dim; j++) {
res[i * Dim + j] = 0;
for (uint32_t k = 0; k < Dim; k++) {
res[i * Dim + j] += Slater[i * Dim + k] * Slater_invT[k * LDS + j];
}
}
}
for (uint32_t i = 0; i < Dim; i++) {
for (uint32_t j = 0; j < Dim; j++) {
double elm = res[i * Dim + j];
if (elm != elm) return 1; // found a NaN!
if (i == j && fabs(elm - 1.0) > tolerance) return 1;
if (i != j && fabs(elm) > tolerance) return 1;
}
}
return 0;
}
void matmul(double *a, double *b, double *prod, const uint64_t LDS, const uint64_t Dim) {
for (uint32_t i = 0; i < Dim; i++) {
for (uint32_t j = 0; j < Dim; j++) {
prod[i * Dim + j] = 0;
for (uint32_t k = 0; k < Dim; k++) {
prod[i * Dim + j] += a[i * Dim + k] * b[k * LDS + j];
}
}
}
}
int32_t check_error_better(const double max, const double tolerance) {
if (max < 0) return -1; // When max was a NaN
else if (max < tolerance) return 0; // Good
else return 1; // Too big
}
void residual(double *a, double *res, const uint64_t Dim) {
for (uint32_t i = 0; i < Dim; i++) {
for (uint32_t j = 0; j < Dim; j++) {
if (i == j) res[i * Dim + j] = a[i * Dim + j] - 1.0;
else res[i * Dim + j] = a[i * Dim + j];
}
}
}
uint32_t test_kernel(char *version, const uint64_t LDS, const uint64_t Dim,
const uint64_t N_updates, const double *Updates,
const uint64_t *Updates_index, const double breakdown, const double tolerance,
double *Slater, double *Slater_inv, double *determinant) {
uint32_t rc = 0;
if (version[0] == 'a') { // Anthony
const double *Upds;
const uint64_t *Ui;
for (int i = 0; i < LDS * Dim; i++) Slater_inv[i] *= *determinant;
for (int j = 0; j < N_updates; j++) {
Upds = &Updates[j * LDS];
Ui = &Updates_index[j];
detupd(Dim, LDS, Upds, Ui, Slater_inv, determinant);
if (determinant == 0) printf("TEST_KERNEL: det_update21 failed\n");
}
for (int i = 0; i < LDS * Dim; i++) Slater_inv[i] /= *determinant;
update_slater_matrix(LDS, Dim, N_updates, Updates, Updates_index, Slater);
rc = check_error(LDS, Dim, Slater_inv, Slater, tolerance);
if (rc != 0) printf("TEST_KERNEL: check_error failed\n");
} else if (version[0] == 'n') { // Naive
rc = qmckl_sherman_morrison(LDS, Dim, N_updates, Updates, Updates_index,
breakdown, Slater_inv, determinant);
if (rc != 0) printf("TEST_KERNEL: qmckl_sherman_morrison failed\n");
update_slater_matrix(LDS, Dim, N_updates, Updates, Updates_index, Slater);
rc = check_error(LDS, Dim, Slater_inv, Slater, tolerance);
if (rc != 0) printf("TEST_KERNEL: check_error failed\n");
} else if (version[0] == 's') { // Splitting
rc = qmckl_sherman_morrison_splitting(LDS, Dim, N_updates, Updates,
Updates_index, breakdown, Slater_inv,
determinant);
if (rc != 0) printf("TEST_KERNEL: qmckl_sherman_morrison_splitting failed\n");
update_slater_matrix(LDS, Dim, N_updates, Updates, Updates_index, Slater);
rc = check_error(LDS, Dim, Slater, Slater_inv, tolerance);
if (rc != 0) printf("TEST_KERNEL: check_error failed\n");
} else if (version[0] == 'b') { // Blocked
rc = qmckl_sherman_morrison_smw32s(LDS, Dim, N_updates, Updates,
Updates_index, breakdown, Slater_inv,
determinant);
if (rc != 0) printf("TEST_KERNEL: qmckl_sherman_morrison_smw32s failed\n");
update_slater_matrix(LDS, Dim, N_updates, Updates, Updates_index, Slater);
rc = check_error(LDS, Dim, Slater, Slater_inv, tolerance);
if (rc != 0) printf("TEST_KERNEL: check_error failed\n");
}
return rc;
}

View File

@ -0,0 +1,43 @@
#include <math.h>
#include <stdio.h>
#include <stdint.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "hdf5.h"
#include "kernels.h"
typedef struct Error {
uint32_t rc;
uint64_t error;
} Error;
void matmul(double *a, double *b, double *prod, const uint64_t LDS, const uint64_t Dim);
void residual(double *a, double *res, const uint64_t Dim);
double frobenius_norm2(double *A, const uint64_t LDS, const uint64_t Dim);
void print_matrix(double *A, const uint64_t LDS, const uint64_t Dim);
double frobenius_norm(double *A, const uint64_t LDS, const uint64_t Dim);
double max_norm(double *A, const uint64_t LDS, const uint64_t Dim);
double condition_number(double *A, double *Ainv, const uint64_t LDS, const uint64_t Dim);
void read_uint(hid_t file_id, const char *key, uint64_t *data);
void read_double(hid_t file_id, const char *key, double *data);
static __inline__ uint64_t rdtsc(void) {
unsigned hi, lo;
__asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
return ((unsigned long long)lo) | (((unsigned long long)hi) << 32);
}
void update_slater_matrix(const uint64_t LDS, const uint64_t Dim,
const uint64_t N_updates, const double *Updates,
const uint64_t *Updates_index, double *Slater);
uint32_t check_error(const uint64_t LDS, const uint64_t Dim, double *Slater_invT,
double *Slater, const double tolerance);
int32_t check_error_better(const double max, const double tolerance);
uint32_t test_kernel(char *version, const uint64_t LDS, const uint64_t Dim,
const uint64_t N_updates, const double *Updates,
const uint64_t *Updates_index, const double breakdown, const double tolerance,
double *Slater, double *Slater_inv, double *determinant);

View File

@ -0,0 +1,33 @@
#!/bin/bash
INDIR="cycles_329_dets"
# INDIR="cycles_15784_dets"
OUTDIR=$1
mkdir -v ${OUTDIR}
cp -av get_stats.m ${OUTDIR}
## All cycles
ln -svf ${INDIR}/all_cycles.h cycles.h
make clean && make
./test a > ${OUTDIR}/ANTHONY.dat
./test n > ${OUTDIR}/NAIVE.dat
./test l > ${OUTDIR}/LATER.dat
./test s > ${OUTDIR}/SPLITTING.dat
./test b > ${OUTDIR}/BLOCKED.dat
./test m > ${OUTDIR}/MKL_LAPACK.dat
## Cycles w/ 2 upds excl. w/ WB2
ln -svf ${INDIR}/2_cycles.h cycles.h
make clean && make
./test 2 > ${OUTDIR}/WB2.dat
## Cycles w/ 3 upds excl. w/ WB3
ln -svf ${INDIR}/3_cycles.h cycles.h
make clean && make
./test 3 > ${OUTDIR}/WB3.dat
make clean
rm cycles.h
(cd ${OUTDIR} && ./get_stats.m)

View File

@ -0,0 +1,632 @@
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include "kernels.h"
extern uint64_t n_splits;
extern uint64_t block_fail;
extern uint64_t recursive_calls;
uint32_t qmckl_sherman_morrison(
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
const double *__restrict __attribute__((aligned(8))) Updates,
const uint64_t *__restrict Updates_index, const double breakdown,
double *__restrict __attribute__((aligned(8))) Slater_inv,
double *__restrict determinant) {
const uint32_t Dim = 21;
const uint32_t LDS = 24;
double __attribute__((aligned(8))) C[Dim];
double __attribute__((aligned(8))) D[LDS];
uint32_t l = 0;
// For each update
while (l < N_updates) {
// C = S^{-1} x u_l
for (uint32_t i = 0; i < Dim; i++) {
C[i] = 0.0;
#pragma ivdep
#pragma vector aligned, novecremainder
for (uint32_t j = 0; j < LDS; j++) {
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; // regular mat-vec product, but actually working on S_inv^T * U_l.
}
}
// Denominator: v_l^T * C
const int cui = Updates_index[l] - 1;
double den = 1.0 + C[cui];
if (fabs(den) < breakdown) {
return 1;
}
double iden = 1.0 / den;
// Update det(A)
if (!determinant)
*determinant *= den;
#pragma ivdep
#pragma vector aligned, novecremainder
for (uint32_t j = 0; j < LDS; j++) {
D[j] = Slater_inv[cui * LDS + j]; // selecting proper column of v_l^T * S_inv
}
// A^{-1} = A^{-1} - C x D / den
for (uint32_t i = 0; i < Dim; i++) {
#pragma ivdep
#pragma vector aligned, novecremainder
for (uint32_t j = 0; j < LDS; j++) {
const double update = C[i] * D[j] * iden;
Slater_inv[i * LDS + j] -= update;
}
}
l += 1;
}
return 0;
}
uint32_t qmckl_woodbury_2(const uint64_t vLDS, const uint64_t vDim,
const double *__restrict __attribute__((aligned(8)))
Updates,
const uint64_t *__restrict Updates_index,
const double breakdown,
double *__restrict __attribute__((aligned(8)))
Slater_inv,
double *__restrict determinant) {
const uint32_t Dim = 21;
const uint32_t LDS = 24;
/*
COMPUTE S^{-1}P - CB^{-1}D : Dim x LDS,
where S^{-1}P : Dim x LDS,
C := S^{-1}PP^TU : Dim x 2,
B := 1 + VC : 2 x 2,
D := VS^{-1}P : 2 x LDS,
P^TU : LDS x 2,
V : 2 x Dim
*/
const uint32_t row1 = (Updates_index[0] - 1);
const uint32_t row2 = (Updates_index[1] - 1);
// Compute C = (S^T)^{-1}U : Dim x 2
double __attribute__((aligned(8))) C[2 * Dim];
for (uint32_t i = 0; i < Dim; i++) {
C[i * 2] = 0;
C[i * 2 + 1] = 0;
#pragma ivdep
#pragma vector aligned, novecremainder
for (uint32_t k = 0; k < LDS; k++) {
C[i * 2] += Slater_inv[i * LDS + k] * Updates[k];
C[i * 2 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
}
}
// const double alpha = 1.0, beta = 0.0;
// const bool TransA = true, TransB = false;
// (void) cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
// Dim, 2, LDS, alpha, Slater_inv, LDS, Updates, LDS, beta,
// C, 2);
// (void) qmckl_dgemm(context, CblasNoTrans, CblasTrans,
// 2, Dim, LDS, alpha, Updates, LDS, Slater_inv, LDS, beta,
// C, 2);
// (void) qmckl_dgemm(context, TransA, TransB,
// 2, Dim, LDS, alpha, Updates, LDS, Slater_inv, LDS,
// beta, C, 2);
// Compute B = 1 + VC : 2 x 2
const double B0 = C[row1 * 2] + 1;
const double B1 = C[row1 * 2 + 1];
const double B2 = C[row2 * 2];
const double B3 = C[row2 * 2 + 1] + 1;
// Check if determinant of inverted matrix is not zero
double det = B0 * B3 - B1 * B2;
if (fabs(det) < breakdown) {
return 1;
}
// Update det(S) when passed
if (determinant != NULL)
*determinant *= det;
// Compute B^{-1} with explicit formula for 2 x 2 inversion
double __attribute__((aligned(8))) Binv[4], idet = 1.0 / det;
Binv[0] = idet * B3;
Binv[1] = -1.0 * idet * B1;
Binv[2] = -1.0 * idet * B2;
Binv[3] = idet * B0;
// tmp = B^{-1}D : 2 x LDS
double __attribute__((aligned(8))) tmp[2 * LDS];
double *__restrict r1dim = &(Slater_inv[row1 * LDS]);
double *__restrict r2dim = &(Slater_inv[row2 * LDS]);
#pragma ivdep
#pragma vector aligned, novecremainder
for (uint32_t j = 0; j < LDS; j++) {
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j];
tmp[LDS + j] = Binv[2] * r1dim[j] + Binv[3] * r2dim[j];
}
// Compute (S^T)^{-1} - C * tmp : Dim x LDS
for (uint32_t i = 0; i < Dim; i++) {
#pragma ivdep
#pragma vector aligned, novecremainder
for (uint32_t j = 0; j < LDS; j++) {
Slater_inv[i * LDS + j] -= C[i * 2] * tmp[j];
Slater_inv[i * LDS + j] -= C[i * 2 + 1] * tmp[LDS + j];
}
}
return 0;
}
uint32_t qmckl_woodbury_3(const uint64_t vLDS, const uint64_t vDim,
const double *__restrict __attribute__((aligned(8)))
Updates,
const uint64_t *__restrict Updates_index,
const double breakdown,
double *__restrict __attribute__((aligned(8)))
Slater_inv,
double *__restrict determinant) {
const uint32_t Dim = 21;
const uint32_t LDS = 24;
/*
COMPUTE (S^T)^{-1} - CB^{-1}D : Dim x LDS,
where S^T : Dim x LDS,
C := (S^T)^{-1}U : Dim x 3,
B := 1 + VC : 3 x 3,
D := V(S^T)^{-1} : 3 x LDS,
U : LDS x 3,
V : 3 x Dim
*/
const uint32_t row1 = (Updates_index[0] - 1);
const uint32_t row2 = (Updates_index[1] - 1);
const uint32_t row3 = (Updates_index[2] - 1);
// Compute C = (S^T)^{-1}U : Dim x 3
double __attribute__((aligned(8))) C[3 * Dim];
for (uint32_t i = 0; i < Dim; i++) {
C[i * 3] = 0;
C[i * 3 + 1] = 0;
C[i * 3 + 2] = 0;
#pragma ivdep
#pragma vector aligned, novecremainder
for (uint32_t k = 0; k < LDS; k++) {
C[i * 3] += Slater_inv[i * LDS + k] * Updates[k];
C[i * 3 + 1] += Slater_inv[i * LDS + k] * Updates[LDS + k];
C[i * 3 + 2] += Slater_inv[i * LDS + k] * Updates[2 * LDS + k];
}
}
// double alpha = 1.0, beta = 0.0;
// cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
// Dim, 3, LDS, alpha, Slater_inv, LDS, Updates, LDS, beta,
// C, 3);
// Compute B = 1 + VC : 3 x 3
const double B0 = C[row1 * 3] + 1;
const double B1 = C[row1 * 3 + 1];
const double B2 = C[row1 * 3 + 2];
const double B3 = C[row2 * 3];
const double B4 = C[row2 * 3 + 1] + 1;
const double B5 = C[row2 * 3 + 2];
const double B6 = C[row3 * 3];
const double B7 = C[row3 * 3 + 1];
const double B8 = C[row3 * 3 + 2] + 1;
// Check if determinant of B is not too close to zero
double det;
det = B0 * (B4 * B8 - B5 * B7) - B1 * (B3 * B8 - B5 * B6) +
B2 * (B3 * B7 - B4 * B6);
if (fabs(det) < breakdown) {
return 1;
}
// Update det(Slater) if passed
if (determinant != NULL)
*determinant *= det;
// Compute B^{-1} with explicit formula for 3 x 3 inversion
double __attribute__((aligned(8))) Binv[9], idet = 1.0 / det;
Binv[0] = (B4 * B8 - B7 * B5) * idet;
Binv[1] = -(B1 * B8 - B7 * B2) * idet;
Binv[2] = (B1 * B5 - B4 * B2) * idet;
Binv[3] = -(B3 * B8 - B6 * B5) * idet;
Binv[4] = (B0 * B8 - B6 * B2) * idet;
Binv[5] = -(B0 * B5 - B3 * B2) * idet;
Binv[6] = (B3 * B7 - B6 * B4) * idet;
Binv[7] = -(B0 * B7 - B6 * B1) * idet;
Binv[8] = (B0 * B4 - B3 * B1) * idet;
// tmp = B^{-1}D : 3 x LDS
double __attribute__((aligned(8))) tmp[3 * LDS];
double *__restrict r1dim = &(Slater_inv[row1 * LDS]);
double *__restrict r2dim = &(Slater_inv[row2 * LDS]);
double *__restrict r3dim = &(Slater_inv[row3 * LDS]);
#pragma ivdep
#pragma vector aligned, novecremainder
for (uint32_t j = 0; j < LDS; j++) {
tmp[j] = Binv[0] * r1dim[j] + Binv[1] * r2dim[j] + Binv[2] * r3dim[j];
tmp[LDS + j] = Binv[3] * r1dim[j] + Binv[4] * r2dim[j] + Binv[5] * r3dim[j];
tmp[2 * LDS + j] = Binv[6] * r1dim[j] + Binv[7] * r2dim[j] + Binv[8] * r3dim[j];
}
// Compute (S^T)^{-1} - C * tmp : Dim x LDS
for (uint32_t i = 0; i < Dim; i++) {
#pragma ivdep
#pragma vector aligned, novecremainder
for (uint32_t j = 0; j < LDS; j++) {
Slater_inv[i * LDS + j] -= C[i * 3] * tmp[j];
Slater_inv[i * LDS + j] -= C[i * 3 + 1] * tmp[LDS + j];
Slater_inv[i * LDS + j] -= C[i * 3 + 2] * tmp[2 * LDS + j];
}
}
return 0;
}
uint32_t qmckl_slagel_splitting(
const uint64_t vLDS, const uint64_t vDim, uint64_t N_updates,
const double *__restrict __attribute__((aligned(8))) Updates,
const uint64_t *__restrict Updates_index, const double breakdown,
double *__restrict __attribute__((aligned(8))) Slater_inv,
double *__restrict __attribute__((aligned(8))) later_updates,
uint64_t *__restrict later_index, uint64_t *__restrict later,
double *__restrict determinant) {
const uint32_t LDS = 24;
const uint32_t Dim = 21;
// double __attribute__((aligned(8))) C[N_updates * Dim];
double __attribute__((aligned(8))) C[Dim];
double __attribute__((aligned(8))) D[LDS];
uint32_t l = 0;
// For each update
while (l < N_updates) {
// C = S^{-1} x U_l
for (uint32_t i = 0; i < Dim; i++) {
C[i] = 0.0;
#pragma ivdep
#pragma vector aligned, novecremainder
for (uint32_t j = 0; j < LDS; j++) {
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; // regular mat-vec product, but actually working on S_inv^T * U_l.
}
}
// Denominator
const int cui = Updates_index[l] - 1;
double den = 1.0 + C[cui];
// printf("test breakdown = %f, den = %f, C[cui] = %f, cui = %d\n", breakdown, fabs(den), C[cui], cui);
if (fabs(den) < breakdown) { // Here is decided to split the update, or not.
// printf("Split! breakdown = %f\n", breakdown);
n_splits += 1;
// U_l = U_l / 2: split the update in 2 equal halves and save the second halve
// in later_updates
#pragma ivdep
#pragma vector aligned, novecremainder
for (uint32_t i = 0; i < LDS; i++) {
later_updates[*later * LDS + i] = Updates[l * LDS + i] / 2.0;
C[i] /= 2.0;
}
later_index[*later] = Updates_index[l];
(*later)++;
den = 1.0 + C[cui];
} // From here onwards we continue with applying the first halve of the update to Slater_inv
double iden = 1.0 / den;
if (!determinant) *determinant *= den;
// D = v^T x S^{-1} : 1 x LDS
#pragma ivdep
#pragma vector aligned, novecremainder
for (uint32_t j = 0; j < LDS; j++) {
D[j] = Slater_inv[cui * LDS + j];
}
// S^{-1} = S^{-1} - C x D / den
for (uint32_t i = 0; i < Dim; i++) {
#pragma ivdep
#pragma vector aligned, novecremainder
for (uint32_t j = 0; j < LDS; j++) {
const double update = C[i] * D[j] * iden;
Slater_inv[i * LDS + j] -= update;
}
}
l += 1;
}
return 0;
}
uint32_t qmckl_sherman_morrison_splitting(
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
const double *__restrict __attribute__((aligned(8))) Updates,
const uint64_t *__restrict Updates_index, const double breakdown,
double *__restrict __attribute__((aligned(8))) Slater_inv,
double *__restrict determinant) {
const uint32_t Dim = 21;
const uint32_t LDS = 24;
double __attribute__((aligned(8))) later_updates[LDS * N_updates];
uint64_t later_index[N_updates];
uint64_t later = 0;
uint32_t rc;
rc = qmckl_slagel_splitting(LDS, Dim, N_updates, Updates, Updates_index,
breakdown, Slater_inv, later_updates, later_index,
&later, determinant);
// if (rc != 0) printf("Something when catastrophically wrong in QMCKL_SLAGEL_SPLITTING\n");
if (later > 0) {
recursive_calls++;
// printf("Later > 0\n");
rc = qmckl_sherman_morrison_splitting(LDS, Dim, later, later_updates,
later_index, breakdown, Slater_inv,
determinant);
// if (rc != 0) printf("Something when catastrophically wrong in QMCKL_SHERMAN_MORRISON_SPLITTING\n");
}
return 0;
}
uint32_t qmckl_sherman_morrison_smw32s(
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
const double *__restrict __attribute__((aligned(8))) Updates,
const uint64_t *__restrict Updates_index, const double breakdown,
double *__restrict __attribute__((aligned(8))) Slater_inv,
double *__restrict determinant) {
const uint32_t Dim = 21;
const uint32_t LDS = 24;
double __attribute__((aligned(8))) later_updates[LDS * N_updates];
uint64_t later_index[N_updates];
uint64_t later = 0;
uint32_t rc;
if (N_updates == 4) { // Special case for 4 rank-1 updates: 2+2
rc = qmckl_woodbury_2(LDS, Dim, Updates, Updates_index,
breakdown, Slater_inv, determinant);
if (rc != 0) { // Send the entire block to slagel_splitting
block_fail += 1;
uint64_t l = 0;
rc = qmckl_slagel_splitting(LDS, Dim, 2, Updates,
Updates_index, breakdown, Slater_inv,
later_updates + (LDS * later),
later_index + later, &l, determinant);
later += l;
}
rc = qmckl_woodbury_2(LDS, Dim, &Updates[2*LDS], &Updates_index[2],
breakdown, Slater_inv, determinant);
if (rc != 0) { // Send the entire block to slagel_splitting
block_fail += 1;
uint64_t l = 0;
rc = qmckl_slagel_splitting(LDS, Dim, 2, &Updates[2*LDS],
&Updates_index[2], breakdown, Slater_inv,
later_updates + (LDS * later),
later_index + later, &l, determinant);
later += l;
}
if (later > 0) {
recursive_calls++;
rc = qmckl_sherman_morrison_splitting(LDS, Dim, later, later_updates,
later_index, breakdown, Slater_inv,
determinant);
}
return 0;
}
// if (N_updates == 6) { // Special case for 6 rank-1 updates: 2+2+2
// rc = qmckl_woodbury_2(LDS, Dim, Updates, Updates_index,
// breakdown, Slater_inv, determinant);
// if (rc != 0) { // Send the entire block to slagel_splitting
// block_fail += 1;
// uint64_t l = 0;
// rc = qmckl_slagel_splitting(LDS, Dim, 2, Updates,
// Updates_index, breakdown, Slater_inv,
// later_updates + (LDS * later),
// later_index + later, &l, determinant);
// later += l;
// }
// rc = qmckl_woodbury_2(LDS, Dim, &Updates[2*LDS], &Updates_index[2],
// breakdown, Slater_inv, determinant);
// if (rc != 0) { // Send the entire block to slagel_splitting
// block_fail += 1;
// uint64_t l = 0;
// rc = qmckl_slagel_splitting(LDS, Dim, 2, &Updates[2*LDS],
// &Updates_index[2], breakdown, Slater_inv,
// later_updates + (LDS * later),
// later_index + later, &l, determinant);
// later += l;
// }
// rc = qmckl_woodbury_2(LDS, Dim, &Updates[4*LDS], &Updates_index[4],
// breakdown, Slater_inv, determinant);
// if (rc != 0) { // Send the entire block to slagel_splitting
// block_fail += 1;
// uint64_t l = 0;
// rc = qmckl_slagel_splitting(LDS, Dim, 2, &Updates[4*LDS],
// &Updates_index[4], breakdown, Slater_inv,
// later_updates + (LDS * later),
// later_index + later, &l, determinant);
// later += l;
// }
// if (later > 0) {
// recursive_calls++;
// rc = qmckl_sherman_morrison_splitting(LDS, Dim, later, later_updates,
// later_index, breakdown, Slater_inv,
// determinant);
// }
// return 0;
// }
// And for the other cases != 4, 6
// Apply first 3*n_of_3blocks updates in n_of_3blocks blocks of 3 updates with
// Woodbury 3x3 kernel
uint32_t n_of_3blocks = N_updates / 3;
uint32_t remainder = N_updates % 3;
uint32_t length_3block = 3 * LDS;
if (n_of_3blocks > 0) {
for (uint32_t i = 0; i < n_of_3blocks; i++) {
const double *Updates_3block = &Updates[i * length_3block];
const uint64_t *Updates_index_3block = &Updates_index[i * 3];
rc = qmckl_woodbury_3(LDS, Dim, Updates_3block, Updates_index_3block,
breakdown, Slater_inv, determinant);
if (rc != 0) { // Send the entire block to slagel_splitting
// printf("QMCKL_WOODBURY_3 failed. Sending to QMCKL_SLAGEL_SPLITTING\n");
block_fail += 1;
uint64_t l = 0;
rc = qmckl_slagel_splitting(LDS, Dim, 3, Updates_3block,
Updates_index_3block, breakdown, Slater_inv,
later_updates + (LDS * later),
later_index + later, &l, determinant);
// if (rc != 0) printf("Something when catastrophically wrong in QMCKL_SLAGEL_SPLITTING\n");
later += l;
}
}
}
// Apply last remaining block of 2 updates with Woodbury 2x2 kernel
if (remainder == 2) {
const double *Updates_2block = &Updates[n_of_3blocks * length_3block];
const uint64_t *Updates_index_2block = &Updates_index[3 * n_of_3blocks];
rc = qmckl_woodbury_2(LDS, Dim, Updates_2block, Updates_index_2block,
breakdown, Slater_inv, determinant);
if (rc != 0) { // Send the entire block to slagel_splitting
// printf("QMCKL_WOODBURY_2 failed. Sending to QMCKL_SLAGEL_SPLITTING\n");
block_fail += 1;
uint64_t l = 0;
rc = qmckl_slagel_splitting(LDS, Dim, 2, Updates_2block,
Updates_index_2block, breakdown, Slater_inv,
later_updates + (LDS * later),
later_index + later, &l, determinant);
// if (rc != 0) printf("Something when catastrophically wrong in QMCKL_SLAGEL_SPLITTING\n");
later += l;
}
}
// Apply last remaining update with slagel_splitting
if (remainder == 1) {
// // printf("Sending single update to QMCKL_SLAGEL_SPLITTING\n");
const double *Updates_1block = &Updates[n_of_3blocks * length_3block];
const uint64_t *Updates_index_1block = &Updates_index[3 * n_of_3blocks];
uint64_t l = 0;
rc = qmckl_slagel_splitting(LDS, Dim, 1, Updates_1block,
Updates_index_1block, breakdown, Slater_inv,
later_updates + (LDS * later),
later_index + later, &l, determinant);
// if (rc != 0) printf("Something when catastrophically wrong in QMCKL_SLAGEL_SPLITTING\n");
later += l;
}
if (later > 0) {
recursive_calls++;
// printf("Sending remaining updates to QMCKL_SHERMAN_MORRISON_SPLITTING\n");
rc = qmckl_sherman_morrison_splitting(LDS, Dim, later, later_updates,
later_index, breakdown, Slater_inv,
determinant);
// if (rc != 0) printf("Something when catastrophically wrong in QMCKL_SHERMAN_MORRISON_SPLITTING\n");
}
return 0;
}
// Sherman Morrison, leaving zero denominators for later
uint32_t qmckl_sherman_morrison_later(
const uint64_t vLDS, const uint64_t vDim, const uint64_t N_updates,
const double *__restrict __attribute__((aligned(8))) Updates,
const uint64_t *__restrict Updates_index, const double breakdown,
double *__restrict __attribute__((aligned(8))) Slater_inv,
double *__restrict determinant) {
const uint32_t Dim = 21;
const uint32_t LDS = 24;
double __attribute__((aligned(8))) C[Dim];
double __attribute__((aligned(8))) D[LDS];
double __attribute__((aligned(8))) later_updates[LDS * N_updates];
uint64_t later_index[N_updates];
uint64_t later = 0;
uint32_t l = 0;
// For each update
while (l < N_updates) {
// C = A^{-1} x U_l
for (uint32_t i = 0; i < Dim; i++) {
C[i] = 0.0;
#pragma ivdep
#pragma vector aligned, novecremainder
for (uint32_t j = 0; j < LDS; j++) {
C[i] += Slater_inv[i * LDS + j] * Updates[l * LDS + j]; // regular mat-vec product, but actually working on S_inv^T * U_l.
}
}
// Denominator
const int cui = Updates_index[l] - 1;
double den = 1.0 + C[cui];
if (fabs(den) < breakdown) {
#pragma ivdep
#pragma vector aligned, novecremainder
// for (uint32_t i = 0; i < Dim; i++) {
for (uint32_t i = 0; i < LDS; i++) {
later_updates[later * LDS + i] = Updates[l * LDS + i];
}
later_index[later] = Updates_index[l];
later++;
l += 1;
continue;
}
double iden = 1.0 / den;
if (!determinant) *determinant *= den;
// D = v^T x A^{-1}
#pragma ivdep
#pragma vector aligned, novecremainder
for (uint32_t j = 0; j < LDS; j++) {
D[j] = Slater_inv[cui * LDS + j];
}
// S^{-1} = S^{-1} - C x D / den
for (uint32_t i = 0; i < Dim; i++) {
#pragma ivdep
#pragma vector aligned, novecremainder
for (uint32_t j = 0; j < LDS; j++) {
const double update = C[i] * D[j] * iden;
Slater_inv[i * LDS + j] -= update;
}
}
l += 1;
}
if (later == N_updates) { // If all the updates have failed, exit early with an error
return 1;
}
else if (later > 0) { // If some have failed, make a recursive call
recursive_calls++;
(void) qmckl_sherman_morrison_later(LDS, Dim, later, later_updates,
later_index, breakdown, Slater_inv, determinant);
}
return 0;
}
// Inplace inverse n x n matrix A.
// returns:
// ret = 0 on success
// ret < 0 illegal argument value
// ret > 0 singular matrix
lapack_int inverse(double *a, uint64_t m, uint64_t n) {
int ipiv[m + 1];
lapack_int ret;
ret = LAPACKE_dgetrf(LAPACK_ROW_MAJOR, m, n, a, n, ipiv);
if (ret != 0) return ret;
ret = LAPACKE_dgetri(LAPACK_ROW_MAJOR, n, a, n, ipiv);
return ret;
}

View File

@ -0,0 +1,312 @@
#include "meuk.h"
#include "cycles.h"
#include <stdint.h>
#define DATASET "dataset_329d_zeropadded_cm.hdf5"
// #define DATASET "dataset_15784d_zeropadded_cm.hdf5"
#define REPETITIONS 100000
uint64_t n_splits;
uint64_t block_fail;
uint64_t recursive_calls;
int main(int argc, char **argv) {
assert(argc == 2);
char *version = argv[1];
// SETUP STORAGE AND DATA ACCESS
hid_t file_id, dataset_id;
herr_t status;
file_id = H5Fopen(DATASET, H5F_ACC_RDONLY, H5P_DEFAULT);
char nupds_key[32];
char upd_idx_key[32];
char upds_key[32];
char slater_key[32];
char slater_inv_key[32];
char det_key[32];
const uint64_t Dim = 21;
const uint64_t LDS = 24;
uint64_t N_updates;
double Slater[LDS * Dim ], SlaterT[LDS * Dim];
double Slater_invT[LDS * Dim], Slater_invT_copy[LDS * Dim];
double determinant, determinant_copy;
// SETUP TEST PARAMETERS
const double breakdown = 0.001; // default = 0.001. 1e-9 might be too small
const double tolerance = 0.001; // default = 0.001
double cumulative = 0;
printf("#----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n");
printf("#1\t2\t3\t4\t\t5\t6\t\t7\t\t8\t\t9\t\t10\t\t11\t\t12\t\t13\t\t14\n");
printf("#CYCLE\tUPDS\tERR_IN\tERR_BREAK\tERR_OUT\tSPLITS\t\tBLK_FAILS\tMAX\t\tFROB\t\tCOND\t\tCPU_CYC\t\tCPU_CYC/UPD\tCUMUL\t\tREC\n");
printf("#----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n");
// FOR EACH UPDATE CYCLE DO:
for (uint32_t cycles_index = 0; cycles_index < n_cycles; cycles_index++) {
// for (uint32_t cycles_index = 0; cycles_index < 1; cycles_index++) {
// for (uint32_t cycles_index = 65; cycles_index < 66; cycles_index++) {
// for (uint32_t cycles_index = 8055; cycles_index < 8056; cycles_index++) {
// 1. READ DATA FROM DATASET
uint32_t cycle = cycles[cycles_index];
sprintf(nupds_key, "/cycle_%d/nupdates", cycle);
sprintf(upd_idx_key, "/cycle_%d/col_update_index", cycle);
sprintf(upds_key, "/cycle_%d/updates", cycle);
sprintf(slater_key, "/cycle_%d/slater_matrix", cycle);
sprintf(slater_inv_key, "/cycle_%d/slater_inverse_t", cycle);
sprintf(det_key, "/cycle_%d/determinant", cycle);
read_uint(file_id, nupds_key, &N_updates);
uint64_t *Updates_index = malloc(N_updates * sizeof(uint64_t));
double *Updates = malloc(LDS * N_updates * sizeof(double));
read_uint(file_id, upd_idx_key, Updates_index);
read_double(file_id, upds_key, Updates);
read_double(file_id, slater_key, Slater);
read_double(file_id, slater_inv_key, Slater_invT);
read_double(file_id, det_key, &determinant);
// Compute transpose of S. ST: 24 x 21
for (int i = 0; i < LDS; i++) {
for (int j = 0; j < Dim; j++) {
SlaterT[i * Dim + j] = Slater[j * LDS + i];
}
}
// Convert repl. upds into additive upds.
for (int i = 0; i < N_updates; i++) {
int col = Updates_index[i] - 1;
for (int j = 0; j < LDS; j++) {
Updates[i * LDS + j] -= SlaterT[col + j * Dim];
}
}
// 2. CHECK ERROR ON THE INPUT DATA AND RECORD RESULT: ERR_INPUT
uint32_t err_inp = check_error(LDS, Dim, Slater_invT, SlaterT, tolerance);
// Update Slater matrix
for (int i = 0; i < N_updates; i++) {
int col = Updates_index[i] - 1;
for (int j = 0; j < Dim; j++) {
SlaterT[col + j * Dim] += Updates[i * LDS + j];
}
} // A this point SlaterT, Updates & the updated SlaterT are correct. Checked in GDB
int32_t err_break;
// 3. SET TIME- AND SPLIT ACCUMULATOR TO ZERO
double accumulator = 0;
double cycles_per_update = 0;
n_splits = 0;
block_fail = 0;
recursive_calls = 0;
// ## FOR A SET NUMBER OF REPETITIONS DO:
for (int rep = 0; rep < REPETITIONS; rep++) {
// 1. MAKE A FRESH COPY OF THE SLATER INVERSE AND DETERMINANT AND USE THE COPY
memcpy(Slater_invT_copy, Slater_invT, LDS * Dim * sizeof(double));
determinant_copy = determinant;
// ### CHOOSE A KERNEL:
if (version[0] == 'a') { // Anthony
const double *Upds;
const uint64_t *Ui;
double determinant_previous;
err_break = 0;
for (int i = 0; i < LDS * Dim; i++) Slater_invT_copy[i] *= determinant_copy; // Multiply inv(Slater-mat) by det(Slater-mat) to get adj(Slater_mat)
for (int i = 0; i < N_updates; i++) {
Upds = &Updates[i * LDS];
Ui = &Updates_index[i];
determinant_previous = determinant_copy;
// 1. FETCH START TIME
uint64_t before = rdtsc();
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
detupd(Dim, LDS, Upds, Ui, Slater_invT_copy, &determinant_copy);
// 3. FETCH FINISH TIME
uint64_t after = rdtsc();
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
accumulator += (double)(after - before);
// 5. STOP APPLYING UPDATES IF BREAKDOWN DETECTED
double lambda = determinant_copy / determinant_previous; // should be id. to lambda in detupd
if (fabs(lambda) < breakdown) {
err_break = 1;
break;
}
}
if (err_break == 1) { // Divide adj(Slater-mat) by OLD det(Slater-mat) to get inv(Slater_mat) again
for (int i = 0; i < LDS * Dim; i++) Slater_invT_copy[i] /= determinant_previous;
} else { // Divide adj(Slater-mat) by NEW det(Slater-mat) to get inv(Slater_mat) again
for (int i = 0; i < LDS * Dim; i++) Slater_invT_copy[i] /= determinant_copy;
}
} else if (version[0] == 'n') { // Naive
// 1. FETCH START TIME
uint64_t before = rdtsc();
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
err_break = qmckl_sherman_morrison(LDS, Dim, N_updates, Updates,
Updates_index, breakdown, Slater_invT_copy, &determinant);
// 3. FETCH FINISH TIME
uint64_t after = rdtsc();
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
accumulator += (double)(after - before);
} else if (version[0] == 'l') { // Later
// 1. FETCH START TIME
uint64_t before = rdtsc();
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
err_break = qmckl_sherman_morrison_later(LDS, Dim, N_updates, Updates,
Updates_index, breakdown, Slater_invT_copy, &determinant);
// 3. FETCH FINISH TIME
uint64_t after = rdtsc();
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
accumulator += (double)(after - before);
} else if (version[0] == '2') { // by twos
// 1. FETCH START TIME
uint64_t before = rdtsc();
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
err_break = qmckl_woodbury_2(LDS, Dim, Updates, Updates_index,
breakdown, Slater_invT_copy, &determinant);
// 3. FETCH FINISH TIME
uint64_t after = rdtsc();
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
accumulator += (double)(after - before);
} else if (version[0] == '3') { // by threes
// 1. FETCH START TIME
uint64_t before = rdtsc();
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
err_break = qmckl_woodbury_3(LDS, Dim, Updates, Updates_index,
breakdown, Slater_invT_copy, &determinant);
// 3. FETCH FINISH TIME
uint64_t after = rdtsc();
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
accumulator += (double)(after - before);
} else if (version[0] == 's') { // Splitting
// 1. FETCH START TIME
uint64_t before = rdtsc();
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
err_break = qmckl_sherman_morrison_splitting(LDS, Dim, N_updates, Updates,
Updates_index, breakdown, Slater_invT_copy, &determinant);
// 3. FETCH FINISH TIME
uint64_t after = rdtsc();
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
accumulator += (double)(after - before);
} else if (version[0] == 'b') { // Blocked
// 1. FETCH START TIME
uint64_t before = rdtsc();
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
err_break = qmckl_sherman_morrison_smw32s(LDS, Dim, N_updates, Updates,
Updates_index, breakdown, Slater_invT_copy, &determinant);
// 3. FETCH FINISH TIME
uint64_t after = rdtsc();
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
accumulator += (double)(after - before);
} else if (version[0] == 'm') { // LAPACK/MKL
// Only send upper Dim x Dim part of matrix to lapack
double tmp[Dim*Dim];
memcpy(tmp, SlaterT, Dim*Dim*sizeof(double));
// 1. FETCH START TIME
uint64_t before = rdtsc();
// 2. EXECUTE KERNEL AND REMEMBER EXIT STATUS
err_break = inverse(tmp, Dim, Dim);
// 3. FETCH FINISH TIME
uint64_t after = rdtsc();
// Copy elements of inverse back, adding 0-padding in "correct" place
for (uint32_t i = 0; i < Dim; i++) {
for (uint32_t j = 0; j < LDS; j++) {
if (j < Dim) Slater_invT_copy[i * LDS + j] = tmp[i * Dim + j];
else Slater_invT_copy[i * LDS + j] = 0.0;
}
}
// 4. ADD TIME DIFFERENCE TO TIME CUMMULATOR
accumulator += (double)(after - before);
} else { // Exit
printf("Version '%c' not implemented.\n", version[0]);
return 1;
}
} // END OF REPETITIONS LOOP
// 4. COPY RESULT BACK TO ORIGINAL
memcpy(Slater_invT, Slater_invT_copy, LDS * Dim * sizeof(double));
determinant = determinant_copy;
// At this point Slater_invT contains the correct inverse matrix
// 5. DIVIDE CYCLE- AND SPLIT-ACCUMULATOR BY NUMBER OF REPETITIONS AND RECORD
// DIVIDE CYCLE-ACCUMULATOR BY NUMBER OF UPDATES AND RECORD
accumulator /= REPETITIONS;
cycles_per_update = accumulator / N_updates;
n_splits /= REPETITIONS;
block_fail /= REPETITIONS;
recursive_calls /= REPETITIONS;
// 6. ADD THE AVERAGED TIME PER CYCLE OF ACCUMULATER TO
// CUMULATIVE RESULT FOR THE ENTIRE DATASET
cumulative += accumulator;
double SSi[Dim * Dim];
matmul(SlaterT, Slater_invT, SSi, LDS, Dim);
double Res[Dim * Dim];
residual(SSi, Res, Dim);
const double max = max_norm(Res, Dim, Dim);
// 7. CHECK ERRROR ON THE UPDATED DATA AND RECORD THE RESULT: ERR_OUT
uint32_t err_out = check_error(LDS, Dim, Slater_invT, SlaterT, tolerance);
// int32_t err_out = check_error_better(max, tolerance);
// if (err_out == 1) printf("cycle index %d: cycle %d with %lu upds failed!\n", cycles_index, cycle, N_updates);
// 8. COMPUTE CONDITION NUMBER
const double condnr = condition_number(Slater, Slater_invT, LDS, Dim);
const double frob = frobenius_norm(Res, Dim, Dim);
// 10. WRITE RESULTS TO FILE: CYCLE#, #UPDS, ERR_INP, ERR_BREAK, #SPLITS, ERR_OUT, COND, #CLCK_TCKS
printf("%u\t%lu\t%u\t%u\t\t%u\t%lu\t\t%lu\t\t%e\t%e\t%e\t%e\t%e\t%e\t%lu\n", cycle, N_updates, err_inp, err_break, err_out, n_splits, block_fail, max, frob, condnr, accumulator, cycles_per_update, cumulative, recursive_calls);
free(Updates_index);
free(Updates);
} // END OF CYCLE LOOP
printf("#----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n");
printf("#1\t2\t3\t4\t\t5\t6\t\t7\t\t8\t\t9\t\t10\t\t11\t\t12\t\t13\t\t14\n");
printf("#CYCLE\tUPDS\tERR_IN\tERR_BREAK\tERR_OUT\tSPLITS\t\tBLK_FAILS\tMAX\t\tFROB\t\tCOND\t\tCPU_CYC\t\tCPU_CYC/UPD\tCUMUL\t\tREC\n");
printf("#----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n");
(void) H5Fclose(file_id);
}

View File

@ -0,0 +1,92 @@
#include <assert.h>
#include "data_cm.h"
#include "meuk.h"
#define REPETITIONS 10000000
int main(int argc, char **argv) {
assert(argc == 3);
char *version = argv[1];
char *number_of_updates = argv[2];
const uint64_t Dim = 21;
const uint64_t LDS = 24;
// const double breakdown = 1e-3;
const double breakdown = 1e-9; // this might be too small and cause NIs
uint32_t rc;
const uint64_t *N_updates;
const double *Updates;
const uint64_t *Updates_index;
double *Slater, *Slater_invT;
double determinant;
if (number_of_updates[0] == '2') { // 2 Updates
N_updates = &N_updates2;
Updates = &Updates2[0];
Updates_index = &Updates_index2[0];
Slater = &Slater2[0];
Slater_invT = &Slater_invT2[0]; // Slater_inv in QMC=Chem is actually its transpose
determinant = determinant2;
} else if (number_of_updates[0] == '3') { // 3 Updates
N_updates = &N_updates3;
Updates = &Updates3[0];
Updates_index = &Updates_index3[0];
Slater = &Slater3[0];
Slater_invT = &Slater_invT3[0];
determinant = determinant3;
} else if (number_of_updates[0] == '5') { // 5 Updates
N_updates = &N_updates5;
Updates = &Updates5[0];
Updates_index = &Updates_index5[0];
Slater = &Slater5[0];
Slater_invT = &Slater_invT5[0];
determinant = determinant5;
} else { // Exit
printf("Incorrect number of updates given\n");
return 1;
}
rc = check_residual(LDS, Dim, Slater_invT, Slater);
assert(rc == 0 && "check_residual()");
rc = test_kernel(version, LDS, Dim, *N_updates, Updates, Updates_index,
breakdown, Slater, Slater_invT, &determinant);
assert(rc == 0 && "test_kernel()");
// EVERYTHING WORKS UP UNTILL HERE
uint64_t before = rdtsc();
if (version[0] == 'a') { // Anthony
for (int i = 0; i < REPETITIONS; i++) {
const double* Upds;
const uint64_t* Ui;
for (int j = 0; j < *N_updates; j++) {
Upds = &Updates[j*LDS];
Ui = &Updates_index[j];
detupd(Dim, LDS, Upds, Ui, Slater_invT, &determinant);
}
}
} else if (version[0] == 'n') { // Naive
for (int i = 0; i < REPETITIONS; i++) {
rc = qmckl_sherman_morrison(LDS, Dim, *N_updates, Updates,
Updates_index, breakdown, Slater_invT, &determinant);
if (rc != 0) printf("qmckl_sherman_morrison failed\n");
}
} else if (version[0] == 's') { // Splitting
for (int i = 0; i < REPETITIONS; i++) {
rc = qmckl_sherman_morrison_splitting(LDS, Dim, *N_updates, Updates,
Updates_index, breakdown, Slater_invT, &determinant);
if (rc != 0) printf("qmckl_sherman_morrison_splitting failed\n");
}
} else if (version[0] == 'b') { // Blocked
for (int i = 0; i < REPETITIONS; i++) {
// rc = qmckl_woodbury_2(LDS, Dim, Updates, Updates_index,
// breakdown, Slater_inv, &determinant);
// rc = qmckl_woodbury_3(LDS, Dim, Updates, Updates_index,
// breakdown, Slater_inv, &determinant);
rc = qmckl_sherman_morrison_smw32s(LDS, Dim, *N_updates, Updates,
Updates_index, breakdown, Slater_invT, &determinant);
if (rc != 0) printf("qmckl_sherman_morrison_smw32s failed\n");
}
}
uint64_t after = rdtsc();
printf("cycles = %f\n", ((double)(after - before) / (double) REPETITIONS));
}

View File

@ -0,0 +1,62 @@
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
static __inline__ uint64_t rdtsc(void) {
unsigned hi, lo;
__asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
return ((unsigned long long)lo) | (((unsigned long long)hi) << 32);
}
int qmckl_sherman_morrison(
const uint64_t LDS, const uint64_t Dim, const uint64_t N_updates,
const double *__restrict __attribute__((aligned(8))) Updates,
const uint64_t *__restrict Updates_index, const double breakdown,
double *__restrict __attribute__((aligned(8))) Slater_inv,
double *__restrict determinant);
int detupd(const uint64_t LDS, const uint64_t Dim, const uint64_t N_updates,
const double *__restrict __attribute__((aligned(8))) Updates,
const uint64_t *__restrict Updates_index, const double breakdown,
double *__restrict __attribute__((aligned(8))) Slater_inv,
double *__restrict determinant);
#define REPETITIONS 100000000
int main(int argc, char **argv) {
assert(argc == 2);
char *version = argv[1];
const uint64_t Dim = 21;
const uint64_t LDS = 24;
const uint64_t N_updates = 1;
double Updates[LDS] __attribute__((aligned(8)));
uint64_t Updates_index[N_updates];
Updates_index[0] = 1;
const double breakdown = 1e-3;
double Slater_inv[LDS * Dim] __attribute__((aligned(8)));
double determinant = 1.0;
for (int i = 0; i < Dim; i++) {
Updates[i] = i;
for (int j = 0; j < Dim; j++) {
Slater_inv[LDS * i + j] = j;
}
}
uint64_t before = rdtsc();
if (version[0] == 'c') {
for (int i = 0; i < REPETITIONS; i++) {
detupd(LDS, Dim, N_updates, Updates, Updates_index, breakdown, Slater_inv,
&determinant);
}
} else {
for (int i = 0; i < REPETITIONS; i++) {
qmckl_sherman_morrison(LDS, Dim, N_updates, Updates, Updates_index,
breakdown, Slater_inv, &determinant);
}
}
uint64_t after = rdtsc();
printf("cycles = %f\n", ((double)(after - before) / (double)REPETITIONS));
}

2
qmckl

@ -1 +1 @@
Subproject commit e180354cbc939b709bd7fe97a89953447284196c Subproject commit ed953cf9b6e62b4cbd42f18f073805eb519e80f7