# Manage the view comparing the variables of a run from math import pi from functools import partial import pandas as pd import numpy as np from bokeh.plotting import figure, curdoc from bokeh.embed import components from bokeh.models import Select, ColumnDataSource, Panel, Tabs, HoverTool,\ RadioButtonGroup, CheckboxGroup, CustomJS import helper import plot ########################################################################## class InspectRuns: # Helper functions related to InspectRun # Returns a dictionary mapping user-readable strings to all run timestamps def gen_runs_selection(self): runs_dict = {} # Iterate over timestamp rows (runs) and fill dict for row in self.metadata.iloc: # The syntax used by pandas makes this part a bit tricky : # row.name is the index of metadata (so it refers to the # timestamp), whereas row["name"] is the column called "name" # (which is the display string used for the run) # runs_dict[run's name] = run's timestamp runs_dict[row["name"]] = row.name return runs_dict def gen_boxplot_tooltips(self, prefix): return [ ("Name", "@%s_x" % prefix), ("Min", "@" + prefix + "_min{%0.18e}"), ("Max", "@" + prefix + "_max{%0.18e}"), ("1st quartile", "@" + prefix + "_quantile25{%0.18e}"), ("Median", "@" + prefix + "_quantile50{%0.18e}"), ("3rd quartile", "@" + prefix + "_quantile75{%0.18e}"), ("μ", "@" + prefix + "_mu{%0.18e}"), ("Number of samples (tests)", "@nsamples") ] def gen_boxplot_tooltips_formatters(self, prefix): return { "@%s_min" % prefix: "printf", "@%s_max" % prefix: "printf", "@%s_quantile25" % prefix: "printf", "@%s_quantile50" % prefix: "printf", "@%s_quantile75" % prefix: "printf", "@%s_mu" % prefix: "printf" } # Data processing helper # (computes new distributions for sigma, s2, s10) def data_processing(self, dataframe): # Compute aggragated mu dataframe["mu"] = np.vectorize( np.average)( dataframe["mu"], weights=dataframe["nsamples"]) # nsamples is the number of aggregated elements (as well as the number # of samples for our new sigma and s distributions) dataframe["nsamples"] = dataframe["nsamples"].apply(lambda x: len(x)) dataframe["mu_x"] = dataframe.index # Make sure that strings don't excede a certain length dataframe["mu_x"] = dataframe["mu_x"].apply( lambda x: x[:17] + "[...]" + x[-17:] if len(x) > 39 else x ) # Get quantiles and mu for sigma, s10, s2 for prefix in ["sigma", "s10", "s2"]: dataframe["%s_x" % prefix] = dataframe["mu_x"] dataframe[prefix] = dataframe[prefix].apply(np.sort) dataframe["%s_min" % prefix] = dataframe[prefix].apply(np.min) dataframe["%s_quantile25" % prefix] = dataframe[prefix].apply( np.quantile, args=(0.25,)) dataframe["%s_quantile50" % prefix] = dataframe[prefix].apply( np.quantile, args=(0.50,)) dataframe["%s_quantile75" % prefix] = dataframe[prefix].apply( np.quantile, args=(0.75,)) dataframe["%s_max" % prefix] = dataframe[prefix].apply(np.max) dataframe["%s_mu" % prefix] = dataframe[prefix].apply(np.average) del dataframe[prefix] return dataframe # Plots update function def update_plots(self): groupby_display = self.widgets["groupby_radio"].labels[ self.widgets["groupby_radio"].active ] groupby = self.factors_dict[groupby_display] filterby_display = self.widgets["filterby_radio"].labels[ self.widgets["filterby_radio"].active ] filterby = self.factors_dict[filterby_display] # Groupby and aggregate lines belonging to the same group in lists groups = self.run_data[ self.run_data.index.isin( [self.widgets["select_filter"].value], level=filterby ) ].groupby(groupby) groups = groups.agg({ "sigma": lambda x: x.tolist(), "s10": lambda x: x.tolist(), "s2": lambda x: x.tolist(), "mu": lambda x: x.tolist(), # Used for mu weighted average first, then will be replaced "nsamples": lambda x: x.tolist() }) # Compute the new distributions, ... groups = self.data_processing(groups).to_dict("list") # Update source # Assign each ColumnDataSource, starting with the boxplots for prefix in ["sigma", "s10", "s2"]: dict = { "%s_x" % prefix: groups["%s_x" % prefix], "%s_min" % prefix: groups["%s_min" % prefix], "%s_quantile25" % prefix: groups["%s_quantile25" % prefix], "%s_quantile50" % prefix: groups["%s_quantile50" % prefix], "%s_quantile75" % prefix: groups["%s_quantile75" % prefix], "%s_max" % prefix: groups["%s_max" % prefix], "%s_mu" % prefix: groups["%s_mu" % prefix], "nsamples": groups["nsamples"] } # Filter outliers if the box is checked if len(self.widgets["outliers_filtering_inspect"].active) > 0: # Boxplots will be filtered by max then min top_outliers = helper.detect_outliers(dict["%s_max" % prefix]) helper.remove_boxplot_outliers(dict, top_outliers, prefix) bottom_outliers = helper.detect_outliers( dict["%s_min" % prefix]) helper.remove_boxplot_outliers(dict, bottom_outliers, prefix) self.sources["%s_source" % prefix].data = dict # Finish with the mu plot dict = { "mu_x": groups["mu_x"], "mu": groups["mu"], "nsamples": groups["nsamples"] } self.sources["mu_source"].data = dict # Filter outliers if the box is checked if len(self.widgets["outliers_filtering_inspect"].active) > 0: mu_outliers = helper.detect_outliers(groups["mu"]) groups["mu"] = helper.remove_outliers(groups["mu"], mu_outliers) groups["mu_x"] = helper.remove_outliers( groups["mu_x"], mu_outliers) # Update plots axis/titles # Get display string of the last (unselected) factor factors_dict = self.factors_dict.copy() del factors_dict[groupby_display] del factors_dict[filterby_display] for_all = list(factors_dict.keys())[0] # Update all display strings for plot title (remove caps, plural) groupby_display = groupby_display.lower() filterby_display = filterby_display.lower()[:-1] for_all = for_all.lower() self.plots["mu_inspect"].title.text = \ "Empirical average μ of %s (groupped by %s, for all %s)" \ % (filterby_display, groupby_display, for_all) self.plots["sigma_inspect"].title.text = \ "Standard deviation σ of %s (groupped by %s, for all %s)" \ % (filterby_display, groupby_display, for_all) self.plots["s10_inspect"].title.text = \ "Significant digits s of %s (groupped by %s, for all %s)" \ % (filterby_display, groupby_display, for_all) self.plots["s2_inspect"].title.text = \ "Significant digits s of %s (groupped by %s, for all %s)" \ % (filterby_display, groupby_display, for_all) helper.reset_x_range(self.plots["mu_inspect"], groups["mu_x"]) helper.reset_x_range(self.plots["sigma_inspect"], groups["sigma_x"]) helper.reset_x_range(self.plots["s10_inspect"], groups["s10_x"]) helper.reset_x_range(self.plots["s2_inspect"], groups["s2_x"]) # Widets' callback functions # Run selector callback def update_run(self, attrname, old, new): filterby = self.widgets["filterby_radio"].labels[ self.widgets["filterby_radio"].active ] filterby = self.factors_dict[filterby] # Update run selection (by using dict mapping) self.current_run = self.runs_dict[new] # Update run data self.run_data = self.data[self.data["timestamp"] == self.current_run] # Save old selected option old_value = self.widgets["select_filter"].value # Update filter options options = self.run_data.index\ .get_level_values(filterby).drop_duplicates().tolist() self.widgets["select_filter"].options = options if old_value not in self.widgets["select_filter"].options: self.widgets["select_filter"].value = options[0] # The update_var callback will be triggered by the assignment else: # Trigger the callback manually (since the plots need to be updated # anyway) self.update_filter("", "", old_value) # "Group by" radio def update_groupby(self, attrname, old, new): # Update "Filter by" radio list filterby_list = list(self.factors_dict.keys()) del filterby_list[self.widgets["groupby_radio"].active] self.widgets["filterby_radio"].labels = filterby_list filterby = self.widgets["filterby_radio"].labels[ self.widgets["filterby_radio"].active ] filterby = self.factors_dict[filterby] # Save old selected option old_value = self.widgets["select_filter"].value # Update filter options options = self.run_data.index\ .get_level_values(filterby).drop_duplicates().tolist() self.widgets["select_filter"].options = options if old_value not in self.widgets["select_filter"].options: self.widgets["select_filter"].value = options[0] # The update_var callback will be triggered by the assignment else: # Trigger the callback manually (since the plots need to be updated # anyway) self.update_filter("", "", old_value) # "Filter by" radio def update_filterby(self, attrname, old, new): filterby = self.widgets["filterby_radio"].labels[ self.widgets["filterby_radio"].active ] filterby = self.factors_dict[filterby] # Save old selected option old_value = self.widgets["select_filter"].value # Update filter selector options options = self.run_data.index\ .get_level_values(filterby).drop_duplicates().tolist() self.widgets["select_filter"].options = options if old_value not in self.widgets["select_filter"].options: self.widgets["select_filter"].value = options[0] # The update_var callback will be triggered by the assignment else: # Trigger the callback manually (since the plots need to be updated # anyway) self.update_filter("", "", old_value) # Filter selector callback def update_filter(self, attrname, old, new): self.update_plots() # Filter outliers checkbox callback def update_outliers_filtering(self, attrname, old, new): # The status (checked/unchecked) of the checkbox is also verified inside # self.update_plots(), so calling this function is enough self.update_plots() # Bokeh setup functions # (for both variable and backend selection at once) def setup_plots(self): tools = "pan, wheel_zoom, xwheel_zoom, ywheel_zoom, reset, save" # Tooltips and formatters dotplot_tooltips = [ ("Name", "@mu_x"), ("μ", "@mu{%0.18e}"), ("Number of samples (tests)", "@nsamples") ] dotplot_formatters = { "@mu": "printf" } sigma_boxplot_tooltips = self.gen_boxplot_tooltips("sigma") sigma_boxplot_tooltips_formatters = self.gen_boxplot_tooltips_formatters( "sigma") s10_boxplot_tooltips = self.gen_boxplot_tooltips("s10") s10_boxplot_tooltips_formatters = self.gen_boxplot_tooltips_formatters( "s10") s2_boxplot_tooltips = self.gen_boxplot_tooltips("s2") s2_boxplot_tooltips_formatters = self.gen_boxplot_tooltips_formatters( "s2") # Plots # Mu plot self.plots["mu_inspect"] = figure( name="mu_inspect", title="", plot_width=900, plot_height=400, x_range=[""], tools=tools, sizing_mode="scale_width" ) plot.fill_dotplot( self.plots["mu_inspect"], self.sources["mu_source"], "mu", tooltips=dotplot_tooltips, tooltips_formatters=dotplot_formatters ) self.doc.add_root(self.plots["mu_inspect"]) # Sigma plot self.plots["sigma_inspect"] = figure( name="sigma_inspect", title="", plot_width=900, plot_height=400, x_range=[""], tools=tools, sizing_mode="scale_width" ) plot.fill_boxplot( self.plots["sigma_inspect"], self.sources["sigma_source"], prefix="sigma", tooltips=sigma_boxplot_tooltips, tooltips_formatters=sigma_boxplot_tooltips_formatters) self.doc.add_root(self.plots["sigma_inspect"]) # s plots self.plots["s10_inspect"] = figure( name="s10_inspect", title="", plot_width=900, plot_height=400, x_range=[""], tools=tools, sizing_mode='scale_width' ) plot.fill_boxplot( self.plots["s10_inspect"], self.sources["s10_source"], prefix="s10", tooltips=s10_boxplot_tooltips, tooltips_formatters=s10_boxplot_tooltips_formatters) s10_tab_inspect = Panel( child=self.plots["s10_inspect"], title="Base 10") self.plots["s2_inspect"] = figure( name="s2_inspect", title="", plot_width=900, plot_height=400, x_range=[""], tools=tools, sizing_mode='scale_width' ) plot.fill_boxplot( self.plots["s2_inspect"], self.sources["s2_source"], prefix="s2", tooltips=s2_boxplot_tooltips, tooltips_formatters=s2_boxplot_tooltips_formatters ) s2_tab_inspect = Panel(child=self.plots["s2_inspect"], title="Base 2") s_tabs_inspect = Tabs( name="s_tabs_inspect", tabs=[s10_tab_inspect, s2_tab_inspect], tabs_location="below" ) self.doc.add_root(s_tabs_inspect) def setup_widgets(self): # Generation of selectable items # Dict contains all inspectable runs (maps display strings to timestamps) # The dict structure allows to get the timestamp from the display string # in O(1) self.runs_dict = self.gen_runs_selection() # Dict maps display strings to column names for the different factors # (var, backend, test) self.factors_dict = { "Variables": "variable", "Backends": "vfc_backend", "Tests": "test" } # Run selection # Contains all options strings runs_display = list(self.runs_dict.keys()) # Will be used when updating plots (contains actual number) self.current_run = self.runs_dict[runs_display[-1]] # Contains the selected option string, used to update current_n_runs current_run_display = runs_display[-1] # This contains only entries matching the run self.run_data = self.data[self.data["timestamp"] == self.current_run] change_run_callback_js = "updateRunMetadata(cb_obj.value);" self.widgets["select_run"] = Select( name="select_run", title="Run :", value=current_run_display, options=runs_display ) self.doc.add_root(self.widgets["select_run"]) self.widgets["select_run"].on_change("value", self.update_run) self.widgets["select_run"].js_on_change("value", CustomJS( code=change_run_callback_js, args=(dict( metadata=helper.metadata_to_dict( helper.get_metadata(self.metadata, self.current_run) ) )) )) # Factors selection # "Group by" radio self.widgets["groupby_radio"] = RadioButtonGroup( name="groupby_radio", labels=list(self.factors_dict.keys()), active=0 ) self.doc.add_root(self.widgets["groupby_radio"]) # The functions are defined inside the template to avoid writing too # much JS server side self.widgets["groupby_radio"].on_change( "active", self.update_groupby ) # "Filter by" radio # Get all possible factors, and remove the one selected in "Group by" filterby_list = list(self.factors_dict.keys()) del filterby_list[self.widgets["groupby_radio"].active] self.widgets["filterby_radio"] = RadioButtonGroup( name="filterby_radio", labels=filterby_list, active=0 ) self.doc.add_root(self.widgets["filterby_radio"]) # The functions are defined inside the template to avoid writing too # much JS server side self.widgets["filterby_radio"].on_change( "active", self.update_filterby ) # Filter selector filterby = self.widgets["filterby_radio"].labels[ self.widgets["filterby_radio"].active ] filterby = self.factors_dict[filterby] options = self.run_data.index\ .get_level_values(filterby).drop_duplicates().tolist() self.widgets["select_filter"] = Select( # We need a different name to avoid collision in the template with # the runs comparison's widget name="select_filter", title="Select a filter :", value=options[0], options=options ) self.doc.add_root(self.widgets["select_filter"]) self.widgets["select_filter"]\ .on_change("value", self.update_filter) # Toggle for outliers filtering self.widgets["outliers_filtering_inspect"] = CheckboxGroup( name="outliers_filtering_inspect", labels=["Filter outliers"], active=[] ) self.doc.add_root(self.widgets["outliers_filtering_inspect"]) self.widgets["outliers_filtering_inspect"]\ .on_change("active", self.update_outliers_filtering) # Communication methods # (to send/receive messages to/from master) # When received, switch to the run_name in parameter def switch_view(self, run_name): self.widgets["select_run"].value = run_name # Constructor def __init__(self, master, doc, data, metadata): self.master = master self.doc = doc self.data = data self.metadata = metadata self.sources = { "mu_source": ColumnDataSource(data={}), "sigma_source": ColumnDataSource(data={}), "s10_source": ColumnDataSource(data={}), "s2_source": ColumnDataSource(data={}) } self.plots = {} self.widgets = {} # Setup Bokeh objects self.setup_plots() self.setup_widgets() # Pass the initial metadata to the template (will be updated in CustomJS # callbacks). This is required because metadata is not displayed in a # Bokeh widget, so we can't update this with a server callback. initial_run = helper.get_metadata(self.metadata, self.current_run) self.doc.template_variables["initial_timestamp"] = self.current_run # At this point, everything should have been initialized, so we can # show the plots for the first time self.update_plots()