Sherman-Morrison/ci/vfc_ci_report/compare_runs.py

543 lines
18 KiB
Python
Raw Normal View History

# Manage the view comparing a variable over different runs
import time
import pandas as pd
from math import pi
from bokeh.plotting import figure, curdoc
from bokeh.embed import components
from bokeh.models import Select, ColumnDataSource, Panel, Tabs, HoverTool, \
2021-05-03 15:47:44 +02:00
TextInput, CheckboxGroup, TapTool, CustomJS
import helper
import plot
2021-05-03 15:47:44 +02:00
##########################################################################
class CompareRuns:
2021-05-03 15:47:44 +02:00
# Helper functions related to CompareRuns
# From an array of timestamps, returns the array of runs names (for the x
# axis ticks), as well as the metadata (in a dict of arrays) associated to
# this array (for the tooltips)
def gen_x_series(self, timestamps):
# Initialize the objects to return
2021-05-03 15:47:44 +02:00
x_series = []
x_metadata = dict(
2021-05-03 15:47:44 +02:00
date=[],
is_git_commit=[],
hash=[],
author=[],
message=[]
)
# n == 0 means we want all runs, we also make sure not to go out of
# bound if asked for more runs than we have
n = self.current_n_runs
if n == 0 or n > len(timestamps):
n = len(timestamps)
for i in range(0, n):
# Get metadata associated to this run
2021-05-03 15:47:44 +02:00
row_metadata = helper.get_metadata(
self.metadata, timestamps[-i - 1])
date = time.ctime(timestamps[-i - 1])
# Fill the x series
str = row_metadata["name"]
2021-05-03 15:47:44 +02:00
x_series.insert(0, helper.get_metadata(
self.metadata, timestamps[-i - 1])["name"])
# Fill the metadata lists
x_metadata["date"].insert(0, date)
2021-05-03 15:47:44 +02:00
x_metadata["is_git_commit"].insert(
0, row_metadata["is_git_commit"])
x_metadata["hash"].insert(0, row_metadata["hash"])
x_metadata["author"].insert(0, row_metadata["author"])
x_metadata["message"].insert(0, row_metadata["message"])
return x_series, x_metadata
# Plots update function
def update_plots(self):
2021-05-03 15:47:44 +02:00
# Select all data matching current test/var/backend
2021-05-03 15:47:44 +02:00
runs = self.data.loc[[self.widgets["select_test"].value],
self.widgets["select_var"].value,
self.widgets["select_backend"].value]
timestamps = runs["timestamp"]
x_series, x_metadata = self.gen_x_series(timestamps.sort_values())
2021-05-03 15:47:44 +02:00
# Update source
main_dict = runs.to_dict("series")
main_dict["x"] = x_series
# Add metadata (for tooltip)
main_dict.update(x_metadata)
# Select the last n runs only
n = self.current_n_runs
2021-05-03 15:47:44 +02:00
main_dict = {key: value[-n:] for key, value in main_dict.items()}
# Generate ColumnDataSources for the 3 dotplots
for stat in ["sigma", "s10", "s2"]:
dict = {
"%s_x" % stat: main_dict["x"],
"is_git_commit": main_dict["is_git_commit"],
"date": main_dict["date"],
"hash": main_dict["hash"],
"author": main_dict["author"],
"message": main_dict["message"],
stat: main_dict[stat],
"nsamples": main_dict["nsamples"],
}
if stat == "s10" or stat == "s2":
2021-05-03 15:47:44 +02:00
dict["%s_lower_bound" %
stat] = main_dict["%s_lower_bound" %
stat]
# Filter outliers if the box is checked
if len(self.widgets["outliers_filtering_compare"].active) > 0:
outliers = helper.detect_outliers(dict[stat])
dict[stat] = helper.remove_outliers(dict[stat], outliers)
2021-05-03 15:47:44 +02:00
dict["%s_x" % stat] = helper.remove_outliers(
dict["%s_x" % stat], outliers)
# Assign ColumnDataSource
self.sources["%s_source" % stat].data = dict
# Generate ColumnDataSource for the boxplot
dict = {
"is_git_commit": main_dict["is_git_commit"],
"date": main_dict["date"],
"hash": main_dict["hash"],
"author": main_dict["author"],
"message": main_dict["message"],
"x": main_dict["x"],
2021-05-03 15:47:44 +02:00
"min": main_dict["min"],
"quantile25": main_dict["quantile25"],
"quantile50": main_dict["quantile50"],
"quantile75": main_dict["quantile75"],
"max": main_dict["max"],
"mu": main_dict["mu"],
"pvalue": main_dict["pvalue"],
"nsamples": main_dict["nsamples"]
}
self.sources["boxplot_source"].data = dict
2021-05-03 15:47:44 +02:00
# Update x axis
2021-05-03 15:47:44 +02:00
helper.reset_x_range(
self.plots["boxplot"],
self.sources["boxplot_source"].data["x"]
)
helper.reset_x_range(
self.plots["sigma_plot"],
self.sources["sigma_source"].data["sigma_x"]
)
helper.reset_x_range(
self.plots["s10_plot"],
self.sources["s10_source"].data["s10_x"]
)
helper.reset_x_range(
self.plots["s2_plot"],
self.sources["s2_source"].data["s2_x"]
)
# Widgets' callback functions
def update_test(self, attrname, old, new):
# If the value is updated by the CustomJS, self.widgets["select_var"].value
2021-05-03 15:47:44 +02:00
# won't be updated, so we have to look for that case and assign it
# manually
# "new" should be a list when updated by CustomJS
2021-05-03 15:47:44 +02:00
if isinstance(new, list):
# If filtering removed all options, we might have an empty list
# (in this case, we just skip the callback and do nothing)
if len(new) > 0:
new = new[0]
else:
return
if new != self.widgets["select_test"].value:
# The callback will be triggered again with the updated value
self.widgets["select_test"].value = new
return
# New list of available vars
self.vars = self.data.loc[new]\
2021-05-03 15:47:44 +02:00
.index.get_level_values("variable").drop_duplicates().tolist()
self.widgets["select_var"].options = self.vars
# Reset var selection if old one is not available in new vars
if self.widgets["select_var"].value not in self.vars:
self.widgets["select_var"].value = self.vars[0]
# The update_var callback will be triggered by the assignment
else:
# Trigger the callback manually (since the plots need to be updated
# anyway)
self.update_var("", "", self.widgets["select_var"].value)
def update_var(self, attrname, old, new):
# If the value is updated by the CustomJS, self.widgets["select_var"].value
2021-05-03 15:47:44 +02:00
# won't be updated, so we have to look for that case and assign it
# manually
# new should be a list when updated by CustomJS
2021-05-03 15:47:44 +02:00
if isinstance(new, list):
new = new[0]
if new != self.widgets["select_var"].value:
# The callback will be triggered again with the updated value
self.widgets["select_var"].value = new
return
# New list of available backends
self.backends = self.data.loc[self.widgets["select_test"].value, self.widgets["select_var"].value]\
2021-05-03 15:47:44 +02:00
.index.get_level_values("vfc_backend").drop_duplicates().tolist()
self.widgets["select_backend"].options = self.backends
# Reset backend selection if old one is not available in new backends
if self.widgets["select_backend"].value not in self.backends:
self.widgets["select_backend"].value = self.backends[0]
# The update_backend callback will be triggered by the assignment
else:
# Trigger the callback manually (since the plots need to be updated
# anyway)
self.update_backend("", "", self.widgets["select_backend"].value)
def update_backend(self, attrname, old, new):
# Simply update plots, since no other data is affected
self.update_plots()
def update_n_runs(self, attrname, old, new):
# Simply update runs selection (value and string display)
self.widgets["select_n_runs"].value = new
self.current_n_runs = self.n_runs_dict[self.widgets["select_n_runs"].value]
self.update_plots()
def update_outliers_filtering(self, attrname, old, new):
self.update_plots()
# Bokeh setup functions
def setup_plots(self):
tools = "pan, wheel_zoom, xwheel_zoom, ywheel_zoom, reset, save"
# Custom JS callback that will be used when tapping on a run
# Only switches the view, a server callback is required to update plots
# (defined inside template to avoid bloating server w/ too much JS code)
js_tap_callback = "goToInspectRuns();"
# Box plot
self.plots["boxplot"] = figure(
name="boxplot", title="Variable distribution over runs",
plot_width=900, plot_height=400, x_range=[""],
tools=tools, sizing_mode="scale_width"
)
box_tooltips = [
("Git commit", "@is_git_commit"),
("Date", "@date"),
("Hash", "@hash"),
("Author", "@author"),
("Message", "@message"),
("Min", "@min{%0.18e}"),
("Max", "@max{%0.18e}"),
("1st quartile", "@quantile25{%0.18e}"),
("Median", "@quantile50{%0.18e}"),
("3rd quartile", "@quantile75{%0.18e}"),
("μ", "@mu{%0.18e}"),
("p-value", "@pvalue"),
("Number of samples", "@nsamples")
]
box_tooltips_formatters = {
2021-05-03 15:47:44 +02:00
"@min": "printf",
"@max": "printf",
"@quantile25": "printf",
"@quantile50": "printf",
"@quantile75": "printf",
"@mu": "printf"
}
plot.fill_boxplot(
self.plots["boxplot"], self.sources["boxplot_source"],
2021-05-03 15:47:44 +02:00
tooltips=box_tooltips,
tooltips_formatters=box_tooltips_formatters,
js_tap_callback=js_tap_callback,
server_tap_callback=self.inspect_run_callback_boxplot,
)
self.doc.add_root(self.plots["boxplot"])
# Sigma plot (bar plot)
self.plots["sigma_plot"] = figure(
name="sigma_plot", title="Standard deviation σ over runs",
plot_width=900, plot_height=400, x_range=[""],
tools=tools, sizing_mode="scale_width"
)
sigma_tooltips = [
("Git commit", "@is_git_commit"),
("Date", "@date"),
("Hash", "@hash"),
("Author", "@author"),
("Message", "@message"),
("σ", "@sigma"),
("Number of samples", "@nsamples")
]
plot.fill_dotplot(
self.plots["sigma_plot"], self.sources["sigma_source"], "sigma",
2021-05-03 15:47:44 +02:00
tooltips=sigma_tooltips,
js_tap_callback=js_tap_callback,
server_tap_callback=self.inspect_run_callback_sigma,
lines=True
)
self.doc.add_root(self.plots["sigma_plot"])
# s plot (bar plot with 2 tabs)
self.plots["s10_plot"] = figure(
name="s10_plot", title="Significant digits s over runs",
plot_width=900, plot_height=400, x_range=[""],
tools=tools, sizing_mode="scale_width"
)
s10_tooltips = [
("Git commit", "@is_git_commit"),
("Date", "@date"),
("Hash", "@hash"),
("Author", "@author"),
("Message", "@message"),
("s", "@s10"),
("s lower bound", "@s10_lower_bound"),
("Number of samples", "@nsamples")
]
plot.fill_dotplot(
self.plots["s10_plot"], self.sources["s10_source"], "s10",
2021-05-03 15:47:44 +02:00
tooltips=s10_tooltips,
js_tap_callback=js_tap_callback,
server_tap_callback=self.inspect_run_callback_s10,
lines=True,
lower_bound=True
)
s10_tab = Panel(child=self.plots["s10_plot"], title="Base 10")
self.plots["s2_plot"] = figure(
name="s2_plot", title="Significant digits s over runs",
plot_width=900, plot_height=400, x_range=[""],
tools=tools, sizing_mode="scale_width"
)
s2_tooltips = [
("Git commit", "@is_git_commit"),
("Date", "@date"),
("Hash", "@hash"),
("Author", "@author"),
("Message", "@message"),
("s", "@s2"),
("s lower bound", "@s2_lower_bound"),
("Number of samples", "@nsamples")
]
plot.fill_dotplot(
self.plots["s2_plot"], self.sources["s2_source"], "s2",
2021-05-03 15:47:44 +02:00
tooltips=s2_tooltips,
js_tap_callback=js_tap_callback,
server_tap_callback=self.inspect_run_callback_s2,
lines=True,
lower_bound=True
)
s2_tab = Panel(child=self.plots["s2_plot"], title="Base 2")
s_tabs = Tabs(
2021-05-03 15:47:44 +02:00
name="s_tabs",
tabs=[s10_tab, s2_tab],
2021-05-03 15:47:44 +02:00
tabs_location="below"
)
self.doc.add_root(s_tabs)
def setup_widgets(self):
2021-05-03 15:47:44 +02:00
# Initial selections
# Test/var/backend combination (we select all first elements at init)
self.tests = self.data\
2021-05-03 15:47:44 +02:00
.index.get_level_values("test").drop_duplicates().tolist()
self.vars = self.data.loc[self.tests[0]]\
2021-05-03 15:47:44 +02:00
.index.get_level_values("variable").drop_duplicates().tolist()
self.backends = self.data.loc[self.tests[0], self.vars[0]]\
2021-05-03 15:47:44 +02:00
.index.get_level_values("vfc_backend").drop_duplicates().tolist()
# Custom JS callback that will be used client side to filter selections
filter_callback_js = """
selector.options = options.filter(e => e.includes(cb_obj.value));
"""
2021-05-03 15:47:44 +02:00
# Test selector widget
# Number of runs to display
# The dict structure allows us to get int value from the display string
# in O(1)
self.n_runs_dict = {
"Last 3 runs": 3,
"Last 5 runs": 5,
"Last 10 runs": 10,
"All runs": 0
}
# Contains all options strings
n_runs_display = list(self.n_runs_dict.keys())
# Will be used when updating plots (contains actual number to diplay)
self.current_n_runs = self.n_runs_dict[n_runs_display[1]]
# Selector widget
self.widgets["select_test"] = Select(
name="select_test", title="Test :",
value=self.tests[0], options=self.tests
)
self.doc.add_root(self.widgets["select_test"])
self.widgets["select_test"].on_change("value", self.update_test)
self.widgets["select_test"].on_change("options", self.update_test)
# Filter widget
self.widgets["test_filter"] = TextInput(
name="test_filter", title="Tests filter:"
)
2021-05-03 15:47:44 +02:00
self.widgets["test_filter"].js_on_change(
"value",
CustomJS(
args=dict(
options=self.tests,
selector=self.widgets["select_test"]),
code=filter_callback_js))
self.doc.add_root(self.widgets["test_filter"])
2021-05-03 15:47:44 +02:00
# Number of runs to display
self.widgets["select_n_runs"] = Select(
name="select_n_runs", title="Display :",
value=n_runs_display[1], options=n_runs_display
)
self.doc.add_root(self.widgets["select_n_runs"])
self.widgets["select_n_runs"].on_change("value", self.update_n_runs)
2021-05-03 15:47:44 +02:00
# Variable selector widget
self.widgets["select_var"] = Select(
name="select_var", title="Variable :",
value=self.vars[0], options=self.vars
)
self.doc.add_root(self.widgets["select_var"])
self.widgets["select_var"].on_change("value", self.update_var)
self.widgets["select_var"].on_change("options", self.update_var)
2021-05-03 15:47:44 +02:00
# Backend selector widget
self.widgets["select_backend"] = Select(
name="select_backend", title="Verificarlo backend :",
value=self.backends[0], options=self.backends
)
self.doc.add_root(self.widgets["select_backend"])
self.widgets["select_backend"].on_change("value", self.update_backend)
2021-05-03 15:47:44 +02:00
# Outliers filtering checkbox
self.widgets["outliers_filtering_compare"] = CheckboxGroup(
name="outliers_filtering_compare",
2021-05-03 15:47:44 +02:00
labels=["Filter outliers"], active=[]
)
self.doc.add_root(self.widgets["outliers_filtering_compare"])
self.widgets["outliers_filtering_compare"]\
2021-05-03 15:47:44 +02:00
.on_change("active", self.update_outliers_filtering)
# Communication methods
# (to send/receive messages to/from master)
# Callback to change view of Inspect runs when data is selected
2021-05-03 15:47:44 +02:00
def inspect_run_callback(self, new, source_name, x_name):
# In case we just unselected everything, then do nothing
if new == []:
return
index = new[-1]
run_name = self.sources[source_name].data[x_name][index]
self.master.go_to_inspect(run_name)
# Wrappers for each plot (since new is the index of the clicked element,
# it is dependent of the plot because we could have filtered some outliers)
# There doesn't seem to be an easy way to add custom parameters to a
# Bokeh callback, so using wrappers seems to be the best solution for now
def inspect_run_callback_boxplot(self, attr, old, new):
self.inspect_run_callback(new, "boxplot_source", "x")
def inspect_run_callback_sigma(self, attr, old, new):
self.inspect_run_callback(new, "sigma_source", "sigma_x")
def inspect_run_callback_s2(self, attr, old, new):
self.inspect_run_callback(new, "s2_source", "s2_x")
def inspect_run_callback_s10(self, attr, old, new):
self.inspect_run_callback(new, "s10_source", "s10_x")
# Constructor
def __init__(self, master, doc, data, metadata):
self.master = master
self.doc = doc
self.data = data
self.metadata = metadata
self.sources = {
"boxplot_source": ColumnDataSource(data={}),
"sigma_source": ColumnDataSource(data={}),
2021-05-03 15:47:44 +02:00
"s10_source": ColumnDataSource(data={}),
"s2_source": ColumnDataSource(data={})
}
self.plots = {}
self.widgets = {}
# Setup Bokeh objects
self.setup_plots()
self.setup_widgets()
# At this point, everything should have been initialized, so we can
# show the plots for the first time
self.update_plots()