mirror of
https://github.com/TREX-CoE/Sherman-Morrison.git
synced 2025-01-13 06:28:35 +01:00
171 lines
4.7 KiB
Python
171 lines
4.7 KiB
Python
# General helper functions for both compare_runs and compare_variables
|
|
|
|
import calendar
|
|
import time
|
|
from itertools import compress
|
|
|
|
import numpy as np
|
|
|
|
# Magic numbers
|
|
max_ticks = 15
|
|
max_zscore = 3
|
|
|
|
##########################################################################
|
|
|
|
|
|
# From a timestamp, return the associated metadata as a Pandas serie
|
|
def get_metadata(metadata, timestamp):
|
|
return metadata.loc[timestamp]
|
|
|
|
|
|
# Convert a metadata Pandas series to a JS readable dict
|
|
def metadata_to_dict(metadata):
|
|
dict = metadata.to_dict()
|
|
|
|
# JS doesn't accept True for booleans, and Python doesn't accept true
|
|
# (because of the caps) => using an integer is a portable solution
|
|
dict["is_git_commit"] = 1 if dict["is_git_commit"] else 0
|
|
|
|
dict["date"] = time.ctime(metadata.name)
|
|
|
|
return dict
|
|
|
|
|
|
# Return a string that indicates the elapsed time since the run, used as the
|
|
# x-axis tick in "Compare runs" or when selecting run in "Inspect run"
|
|
def get_run_name(timestamp, hash):
|
|
|
|
gmt = time.gmtime()
|
|
now = calendar.timegm(gmt)
|
|
diff = now - timestamp
|
|
|
|
# Special case : < 1 minute (return string directly)
|
|
if diff < 60:
|
|
str = "Less than a minute ago"
|
|
|
|
if hash != "":
|
|
str = str + " (%s)" % hash
|
|
|
|
if str == get_run_name.previous:
|
|
get_run_name.counter = get_run_name.counter + 1
|
|
str = "%s (%s)" % (str, get_run_name.counter)
|
|
else:
|
|
get_run_name.counter = 0
|
|
get_run_name.previous = str
|
|
|
|
return str
|
|
|
|
# < 1 hour
|
|
if diff < 3600:
|
|
n = int(diff / 60)
|
|
str = "%s minute%s ago"
|
|
# < 1 day
|
|
elif diff < 86400:
|
|
n = int(diff / 3600)
|
|
str = "%s hour%s ago"
|
|
# < 1 week
|
|
elif diff < 604800:
|
|
n = int(diff / 86400)
|
|
str = "%s day%s ago"
|
|
# < 1 month
|
|
elif diff < 2592000:
|
|
n = int(diff / 604800)
|
|
str = "%s week%s ago"
|
|
# > 1 month
|
|
else:
|
|
n = diff / 2592000
|
|
str = "%s month%s ago"
|
|
|
|
plural = ""
|
|
if n != 1:
|
|
plural = "s"
|
|
|
|
str = str % (n, plural)
|
|
|
|
# We might want to add the git hash
|
|
if hash != "":
|
|
str = str + " (%s)" % hash
|
|
|
|
# Finally, check for duplicate with previously generated string
|
|
if str == get_run_name.previous:
|
|
# Increment the duplicate counter and add it to str
|
|
get_run_name.counter = get_run_name.counter + 1
|
|
str = "%s (%s)" % (str, get_run_name.counter)
|
|
|
|
else:
|
|
# No duplicate, reset both previously generated str and duplicate
|
|
# counter
|
|
get_run_name.counter = 0
|
|
get_run_name.previous = str
|
|
|
|
return str
|
|
|
|
|
|
# These external variables will store data about the last generated string to
|
|
# avoid duplicates (assuming the runs are sorted by time)
|
|
get_run_name.counter = 0
|
|
get_run_name.previous = ""
|
|
|
|
|
|
def reset_run_strings():
|
|
get_run_name.counter = 0
|
|
get_run_name.previous = ""
|
|
|
|
|
|
# Update all the x-ranges from a dict of plots
|
|
def reset_x_range(plot, x_range):
|
|
plot.x_range.factors = x_range
|
|
|
|
if len(x_range) < max_ticks:
|
|
plot.xaxis.major_tick_line_color = "#000000"
|
|
plot.xaxis.minor_tick_line_color = "#000000"
|
|
|
|
plot.xaxis.major_label_text_font_size = "8pt"
|
|
|
|
else:
|
|
plot.xaxis.major_tick_line_color = None
|
|
plot.xaxis.minor_tick_line_color = None
|
|
|
|
plot.xaxis.major_label_text_font_size = "0pt"
|
|
|
|
|
|
# Return an array of booleans that indicate which elements are outliers
|
|
# (True means element is not an outlier and must be kept)
|
|
def detect_outliers(array, max_zscore=max_zscore):
|
|
if len(array) <= 2:
|
|
return [True] * len(array)
|
|
|
|
median = np.median(array)
|
|
std = np.std(array)
|
|
if std == 0:
|
|
return array
|
|
distance = abs(array - median)
|
|
# Array of booleans with elements to be filtered
|
|
outliers_array = distance < max_zscore * std
|
|
|
|
return outliers_array
|
|
|
|
|
|
def remove_outliers(array, outliers):
|
|
return list(compress(array, outliers))
|
|
|
|
|
|
def remove_boxplot_outliers(dict, outliers, prefix):
|
|
outliers = detect_outliers(dict["%s_max" % prefix])
|
|
|
|
dict["%s_x" % prefix] = remove_outliers(dict["%s_x" % prefix], outliers)
|
|
|
|
dict["%s_min" % prefix] = remove_outliers(
|
|
dict["%s_min" % prefix], outliers)
|
|
dict["%s_quantile25" % prefix] = remove_outliers(
|
|
dict["%s_quantile25" % prefix], outliers)
|
|
dict["%s_quantile50" % prefix] = remove_outliers(
|
|
dict["%s_quantile50" % prefix], outliers)
|
|
dict["%s_quantile75" % prefix] = remove_outliers(
|
|
dict["%s_quantile75" % prefix], outliers)
|
|
dict["%s_max" % prefix] = remove_outliers(
|
|
dict["%s_max" % prefix], outliers)
|
|
dict["%s_mu" % prefix] = remove_outliers(dict["%s_mu" % prefix], outliers)
|
|
|
|
dict["nsamples"] = remove_outliers(dict["nsamples"], outliers)
|