Sherman-Morrison/ci/vfc_ci_report/helper.py

# General helper functions for both compare_runs and compare_variables

import calendar
import time
from itertools import compress

import numpy as np

# Magic numbers
max_ticks = 15
max_zscore = 3

##########################################################################


# From a timestamp, return the associated metadata as a Pandas serie
def get_metadata(metadata, timestamp):
    return metadata.loc[timestamp]


# Convert a metadata Pandas series to a JS readable dict
def metadata_to_dict(metadata):
    dict = metadata.to_dict()

    # JS doesn't accept True for booleans, and Python doesn't accept true
    # (because of the caps) => using an integer is a portable solution
    dict["is_git_commit"] = 1 if dict["is_git_commit"] else 0

    dict["date"] = time.ctime(metadata.name)

    return dict


# Return a string that indicates the elapsed time since the run, used as the
# x-axis tick in "Compare runs" or when selecting run in "Inspect run"
def get_run_name(timestamp, hash):

    gmt = time.gmtime()
    now = calendar.timegm(gmt)
    diff = now - timestamp

    # Special case : < 1 minute (return string directly)
    if diff < 60:
        str = "Less than a minute ago"

        if hash != "":
            str = str + " (%s)" % hash

        if str == get_run_name.previous:
            get_run_name.counter = get_run_name.counter + 1
            str = "%s (%s)" % (str, get_run_name.counter)
        else:
            get_run_name.counter = 0
            get_run_name.previous = str

        return str

    # < 1 hour
    if diff < 3600:
        n = int(diff / 60)
        str = "%s minute%s ago"
    # < 1 day
    elif diff < 86400:
        n = int(diff / 3600)
        str = "%s hour%s ago"
    # < 1 week
    elif diff < 604800:
        n = int(diff / 86400)
        str = "%s day%s ago"
    # < 1 month
    elif diff < 2592000:
        n = int(diff / 604800)
        str = "%s week%s ago"
    # > 1 month
    else:
        n = diff / 2592000
        str = "%s month%s ago"

    plural = ""
    if n != 1:
        plural = "s"

    str = str % (n, plural)

    # We might want to add the git hash
    if hash != "":
        str = str + " (%s)" % hash

    # Finally, check for duplicate with previously generated string
    if str == get_run_name.previous:
        # Increment the duplicate counter and add it to str
        get_run_name.counter = get_run_name.counter + 1
        str = "%s (%s)" % (str, get_run_name.counter)

    else:
        # No duplicate, reset both previously generated str and duplicate
        # counter
        get_run_name.counter = 0
        get_run_name.previous = str

    return str


# These external variables will store data about the last generated string to
# avoid duplicates (assuming the runs are sorted by time)
get_run_name.counter = 0
get_run_name.previous = ""


def reset_run_strings():
    get_run_name.counter = 0
    get_run_name.previous = ""


# Update all the x-ranges from a dict of plots
def reset_x_range(plot, x_range):
    plot.x_range.factors = x_range

    if len(x_range) < max_ticks:
        plot.xaxis.major_tick_line_color = "#000000"
        plot.xaxis.minor_tick_line_color = "#000000"

        plot.xaxis.major_label_text_font_size = "8pt"

    else:
        plot.xaxis.major_tick_line_color = None
        plot.xaxis.minor_tick_line_color = None

        plot.xaxis.major_label_text_font_size = "0pt"


# Return an array of booleans that indicate which elements are outliers
# (True means element is not an outlier and must be kept)
def detect_outliers(array, max_zscore=max_zscore):
    if len(array) <= 2:
        return [True] * len(array)

    median = np.median(array)
    std = np.std(array)
    if std == 0:
        return array
    distance = abs(array - median)
    # Array of booleans with elements to be filtered
    outliers_array = distance < max_zscore * std

    return outliers_array


def remove_outliers(array, outliers):
    return list(compress(array, outliers))


def remove_boxplot_outliers(dict, outliers, prefix):
    outliers = detect_outliers(dict["%s_max" % prefix])

    dict["%s_x" % prefix] = remove_outliers(dict["%s_x" % prefix], outliers)

    dict["%s_min" % prefix] = remove_outliers(
        dict["%s_min" % prefix], outliers)
    dict["%s_quantile25" % prefix] = remove_outliers(
        dict["%s_quantile25" % prefix], outliers)
    dict["%s_quantile50" % prefix] = remove_outliers(
        dict["%s_quantile50" % prefix], outliers)
    dict["%s_quantile75" % prefix] = remove_outliers(
        dict["%s_quantile75" % prefix], outliers)
    dict["%s_max" % prefix] = remove_outliers(
        dict["%s_max" % prefix], outliers)
    dict["%s_mu" % prefix] = remove_outliers(dict["%s_mu" % prefix], outliers)

    dict["nsamples"] = remove_outliers(dict["nsamples"], outliers)