In [1]:
from graph import *
from models import *
from metrics import *

import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import plotly.graph_objects as go

In [2]:
# load data from the spreadsheet which defines the structure of the workflow,
# as well as the parameters for data rates, efficiency, data reduction, and classifier performance
run3_system = dataframes_from_spreadsheet("cms_system_60.xlsx")
run5_system = dataframes_from_spreadsheet("cms_system_200.xlsx")
run5_smartpx_system = dataframes_from_spreadsheet("cms_system_200_smartpx.xlsx")

In [3]:
run3_system.detectors

Unnamed: 0,Category,Detector,Data (bytes),Sample Rate,Link Efficiency (J/bit),Op Efficiency (J/op),Compression
0,Tracking,Inner Tracker,436666.666667,40000000,2.22e-11,0,0
1,Tracking,Outer Tracker PS,206666.666667,40000000,2.22e-11,0,0
2,Tracking,Outer Tracker 2S,126666.666667,40000000,2.22e-11,0,0
3,Tracking,Track Finder TPG,10000.0,40000000,2.22e-11,0,0
4,Timing,MIP Timing BTL,76666.666667,40000000,2.22e-11,0,0
5,Timing,MIP Timing ETL,136666.666667,40000000,2.22e-11,0,0
6,Calorimetry,ECAL Barrel,180000.0,40000000,2.22e-11,0,0
7,Calorimetry,HCAL Barrel,240000.0,40000000,2.22e-11,0,0
8,Calorimetry,HCAL HO,30000.0,40000000,2.22e-11,0,0
9,Calorimetry,HCAL HF,60000.0,40000000,2.22e-11,0,0


In [4]:
run3_system.triggers

Unnamed: 0,Name,Output,Data (bytes),Reduction Ratio,Classifier,Skill mean,Skill variance,Link Efficiency (J/bit),Op Efficiency (J/op),Compression
0,Tracking,Intermediate,0,1,Dummy,0,0,2.5e-11,0.0,0
1,Timing,Intermediate,0,1,Dummy,0,0,2.5e-11,0.0,0
2,Calorimetry,Intermediate,0,1,Dummy,0,0,2.5e-11,0.0,0
3,Muon,Intermediate,0,1,Gaussian,0,0,2.5e-11,0.0,0
4,Intermediate,Global,260000,400,L1T,0,0,2.5e-11,0.003,0
5,Global,Disk,0,100,Gaussian,4,1,2.5e-11,16.0,0
6,Disk,,0,1,Dummy,0,0,2.5e-11,0.0,0


In [5]:
run5_system.detectors

Unnamed: 0,Category,Detector,Data (bytes),Sample Rate,Compression,Link Efficiency (J/bit),Op Efficiency (J/op),PU 200
0,Tracking,Inner Tracker,1440000,40000000,0,2.22e-11,0,1.44
1,Tracking,Outer Tracker PS,720000,40000000,0,2.22e-11,0,0.72
2,Tracking,Outer Tracker 2S,430000,40000000,0,2.22e-11,0,0.43
3,Tracking,Track Finder TPG,10000,40000000,0,2.22e-11,0,0.01
4,Timing,MIP Timing BTL,240000,40000000,0,2.22e-11,0,0.24
5,Timing,MIP Timing ETL,440000,40000000,0,2.22e-11,0,0.44
6,Calorimetry,ECAL Barrel,600000,40000000,0,2.22e-11,0,0.6
7,Calorimetry,HCAL Barrel,240000,40000000,0,2.22e-11,0,0.24
8,Calorimetry,HCAL HO,30000,40000000,0,2.22e-11,0,0.03
9,Calorimetry,HCAL HF,60000,40000000,0,2.22e-11,0,0.06


In [6]:
[d for d in run5_system.detectors.iloc][9]

Category                   Calorimetry
Detector                       HCAL HF
Data (bytes)                     60000
Sample Rate                   40000000
Compression                          0
Link Efficiency (J/bit)            0.0
Op Efficiency (J/op)                 0
PU 200                            0.06
Name: 9, dtype: object

In [7]:
#import the data predicting wall time scaling by pileup
scaling = pd.read_excel("wall time scaling.xlsx", sheet_name="Data")
#fit a polynomial to this data for CPU and GPU runtimes
fit_poly = lambda x, k3, k2, k1: k3 * x ** 3 + k2 * x ** 2 + k1 * x
k, cv = curve_fit(fit_poly, scaling["Size"], scaling["Wall Time"])

In [8]:
#define a dictionary with functions defining the scaling of trigger runtimes with incoming data
funcs = {"Global": lambda x: fit_poly(x, *k), "Intermediate": lambda x: x / 2.0e6}

In [9]:
"""
Vary the accept rate of the level 1 trigger and inspect its impact on performance and resources required
"""
def init_system(functions, l1t_reduction: float, pileup_interp: float):
    d_3 = run3_system.detectors.copy()

    new_vals = (1 - pileup_interp) * d_3["Data (bytes)"].values + (pileup_interp) * run5_system.detectors["Data (bytes)"].values
    d_3["Data (bytes)"] = new_vals
        
    t = run3_system.triggers.copy()
    #intermediate reduction stage
    t.at[4, "Reduction Ratio"] = l1t_reduction

    g = construct_graph(d_3, t, run3_system.globals, functions)

    return g

In [10]:
ex_baseline = init_system(funcs, 400, 0.0)

  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  fit = lambda l: np.abs(self.egamma_rate - quad(lambda x: self.exp_dist(x, l) * interpolator(x), np.min(xs), np.max(xs))[0])


In [11]:
ex_baseline

<networkx.classes.digraph.DiGraph at 0x177b33280>

In [12]:
from copy import deepcopy

In [13]:
def extract_results(graph):

    power = graph.graph["op power"] + graph.graph["link power"]
    confusion = graph.graph["performance"]

    return power, confusion

In [14]:
ex_baseline.nodes["Inner Tracker"]

{'sample data': 436666.6666666666,
 'sample rate': 40000000,
 'type': 'detector',
 'op efficiency': 0,
 'classifier': <classifier.DummyClassifier at 0x177baed10>,
 'error matrix': array([[0., 0.],
        [1., 1.]]),
 'reduction ratio': 1.0,
 'reduction': 0.0,
 'data reduction': 1.0,
 'complexity': <function graph.detectors.<locals>.<lambda>(x)>,
 'global ratio': 40000.0,
 'message size': 436666.6666666666,
 'ops': 436666.6666666666,
 'contingency': array([[       0,        0],
        [39999000,     1000]]),
 'input rate': 40000000,
 'output rate': 40000000,
 'discards': array([0, 0]),
 'energy': 0.0,
 'power': 0.0}

In [15]:
def vary_pileup(graph, interp: float):
    for (i, d) in enumerate(run3_system.detectors.iloc):
        name = d["Detector"]
        #interpolate linearly between run3 and run5 data rates
        data = (1 - interp) * d["Data (bytes)"] + (interp) * run5_system.detectors.iloc[i]["Data (bytes)"]
        graph.nodes[name]["sample data"] = data

    return graph

In [16]:
def vary_system(graph, reduction_ratio: float, interp):
    graph = deepcopy(graph)
    graph.nodes["Intermediate"]["reduction ratio"] = reduction_ratio
    graph = vary_pileup(graph, interp)
    

    graph = update_throughput(graph)

    power = graph.graph["op power"] + graph.graph["link power"]
    confusion = graph.graph["performance"]

    return power, confusion

In [72]:
baseline = vary_system(ex_baseline, 400, 0.0)

In [73]:
baseline

(2095046.1633635233,
 array([[39998253,      742],
        [     745,      256]]),
 <networkx.classes.digraph.DiGraph at 0x32031a920>)

In [74]:
run5 = vary_system(ex_baseline, 53.3, 1.0)

In [75]:
run5

(335455256.3411208,
 array([[39988327,     4162],
        [    4166,     3340]]),
 <networkx.classes.digraph.DiGraph at 0x30ff5a7a0>)

In [76]:
#vary this accept rate from today's rate to the planned Run-5 
l1t_reductions = np.linspace(450, 40, 101)
pileup = np.linspace(0, 1.0, 101)

In [77]:
res = [[vary_system(ex_baseline, r, s) for r in l1t_reductions] for s in pileup]

In [40]:
def extract_metrics(results):
    all_confusion = np.array([r[1] for r in results])

    all_power = [r[0] / density_scale_model(r[2].graph["globals"]["Year"][0]) for r in results]
    all_power = np.array(all_power)

    all_recall = np.array([recall(all_confusion[i,:,:]) for i in range(all_confusion.shape[0])])
    all_precision = np.array([precision(all_confusion[i,:,:]) for i in range(all_confusion.shape[0])])
    all_f1 = np.array([f1_score(all_confusion[i,:,:]) for i in range(all_confusion.shape[0])])
    productivity = np.array([np.sum(get_passed(all_confusion[i,:,:])) for i in range(all_confusion.shape[0])])

    metrics = {"confusion": all_confusion,
               "power": all_power,
               "recall": all_recall,
               "precision": all_precision,
               "f1 score": all_f1,
               "productivity": all_recall * productivity}

    return metrics

In [41]:
run5_metrics = [extract_metrics(r) for r in res]

In [42]:
res_f1 = np.stack([r["f1 score"] for r in run5_metrics])

In [43]:
res_recall = np.stack([r["recall"] for r in run5_metrics])

In [44]:
res_precision = np.stack([r["precision"] for r in run5_metrics])

In [45]:
power = np.stack([r["power"] for r in run5_metrics])

In [46]:
from scipy.ndimage import gaussian_filter

In [47]:
smoothed_f1 = gaussian_filter(res_f1, sigma=3)

In [48]:
np.savez_compressed("smoothed_f1.npz", smoothed_f1)

In [68]:
c = ex.nodes["Intermediate"]["classifier"]

In [75]:
fig = go.Figure(data = go.Histogram(x = c.null_scores, name = "False"))
fig.add_trace(go.Histogram(x = c.pos_scores, name = "True"))
fig.update_layout(width =800, height = 600,
                  title = "Histogram of L1T Classifier Model Scores")
fig.show()

In [49]:
systems_f1 = np.array([f1_score(s[1]) for s in all_systems])

In [50]:
systems_power = np.array([s[0] / density_scale_model(s[2].graph["globals"]["Year"][0]) for s in all_systems])

In [51]:
systems_power / 1e6

array([ 6.98981939,  6.93924151, 25.8554917 , 25.81942824, 20.4384212 ])

In [52]:
systems_reductions = np.array([400, 400, 53.3, 53.3, 53.3])
l1t_improvement = np.array([0.0, 0.4, 0.0, 0.4, 0.4])

In [82]:
fig = go.Figure(data =
    go.Contour(
        z=smoothed_f1,
        x=l1t_reductions, # horizontal axis
        y=l1t_skills, # vertical axis,
        contours = dict(showlabels = True),
        colorbar = dict(title = "F1 Score")
         
    ),
    )

y_offset = 0.015
fig.add_trace(go.Scatter(x = systems_reductions[0:1],
                        y = l1t_improvement[0:1] + y_offset,
                        mode = "markers",
                        marker = dict(size = 14, color = "gray", symbol="circle"),
                        name = "Phase-1"))

fig.add_trace(go.Scatter(x = systems_reductions[1:2],
                        y = l1t_improvement[1:2],
                        mode = "markers",
                        marker = dict(size = 14, color = "gray", symbol = "square"),
                        name = "L1T Tracking"))

fig.add_trace(go.Scatter(x = systems_reductions[2:3],
                        y = l1t_improvement[2:3] + y_offset,
                        mode = "markers",
                        marker = dict(size = 14, color = "gray", symbol = "cross"),
                        name = "Increased L1T Accept"))

fig.add_trace(go.Scatter(x = systems_reductions[3:4],
                        y = l1t_improvement[3:4],
                        mode = "markers",
                        marker = dict(size = 14, color = "gray", symbol = "star"),
                        name = "Phase-2 & Data Reduction"))

# fig.add_trace(go.Scatter(x = systems_reductions[0:4],
#     y = l1t_improvement[0:4],
#     mode = "markers+text",
#     marker = dict(size = 14,
#             color = ["white", "blue", "red", "purple"]),
#     text =  system_labels[0:4],
#     textposition = "top left",
#     textfont = dict(color = "rgb(255, 255, 255)")       ))

fig.update_layout(width = 800, 
                  height = 600,
                  xaxis_title = "L1T Reduction Ratio",
                  yaxis_title = "L1T Skill Improvement",
                  title = "F1 Score by L1T Skill & Reduction Ratio",
                  legend=dict(xanchor = "right",
                    x = 0.95))
fig.update_xaxes(autorange="reversed")
fig.update_yaxes(range=[0.0, 0.8])
fig.show()

In [54]:
fig = go.Figure(data =
    go.Contour(
        z=power,
        x=l1t_reductions, # horizontal axis
        y=l1t_skills, # vertical axis,
         contours_coloring='heatmap',
    ),
    )
fig.update_layout(width = 800, 
                  height = 600,
                  title = "Power by Trigger Skill & Reduction Ratio",
                  xaxis=dict(
                        title="Reduction Ratio",
                        titlefont=dict(size=24, family='Arial, bold')  # Bold font for the x-axis title
                    ),
                    yaxis=dict(
                        title="Skill",
                        titlefont=dict(size=24, family='Arial, bold')  # Bold font for the y-axis title
                    ),
                    font = dict(size=18,),)
fig.update_xaxes(autorange="reversed")
fig.show()

In [55]:
#because its rejection is so much higher, there's more potential improvement gained by making L1T's skill higher 
#than simply passing more data to the HLT

In [56]:
fig = go.Figure(data =
    go.Contour(
        z = smoothed_f1,
        x=power[0,:], # horizontal axis
        y=l1t_skills, # vertical axis,
         contours_coloring='heatmap',
         contours = dict(showlabels = True)
    ),
    )
fig.update_layout(width = 800,
                  height = 600,
                  title = "F1 Score by Skill Improvement and Power",
                  xaxis_title = "Power",
                  yaxis_title = "L1T Skill Improvement")
fig.update_yaxes(range=(0.0, 0.8))
fig.show()

In [57]:
fig = go.Figure(data =
    go.Contour(
        z = np.transpose(smoothed_f1),
        y=power[0,:], # horizontal axis
        x=l1t_skills, # vertical axis,
         contours_coloring='heatmap',
         contours = dict(showlabels = True)
    ),
    )

fig.add_trace(go.Scatter(y = systems_power[0:1],
                        x = l1t_improvement[0:1],
                        mode = "markers",
                        marker = dict(size = 14, color = "gray"),
                        name = "Phase-1"))

fig.add_trace(go.Scatter(y = systems_power[1:2],
                        x = l1t_improvement[1:2],
                        mode = "markers",
                        marker = dict(size = 14, color = "red"),
                        name = "L1T Tracking"))

fig.add_trace(go.Scatter(y = systems_power[2:3],
                        x = l1t_improvement[2:3],
                        mode = "markers",
                        marker = dict(size = 14, color = "blue"),
                        name = "Increased L1T Accept"))

fig.add_trace(go.Scatter(y = systems_power[3:4],
                        x = l1t_improvement[3:4],
                        mode = "markers",
                        marker = dict(size = 14, color = "purple"),
                        name = "Phase-2"))

fig.add_trace(go.Scatter(y = systems_power[4:],
                        x = l1t_improvement[4:],
                        mode = "markers",
                        marker = dict(size = 14, color = "green"),
                        name = "Data Reduction"))

fig.update_layout(width = 800,
                  height = 600,
                  title = "F1 Score by Skill Improvement and Power",
                  yaxis_title = "Power",
                  xaxis_title = "L1T Skill Improvement",
                  legend=dict(xanchor = "right",
                    x = 0.95))
fig.update_xaxes(range=(0.0, 0.7))
fig.show()

In [58]:
output_rate = np.array([1e3, 1e3, 7.5e3, 7.5e3, 7.5e3])

In [59]:
output_rate

array([1000., 1000., 7500., 7500., 7500.])

In [60]:
productivity = (systems_f1 * output_rate) / systems_power

In [61]:
systems_f1

array([0.27195957, 0.49095642, 0.44882729, 0.84024537, 0.83963459])

In [62]:
productivity

array([3.89079541e-05, 7.07507321e-05, 1.30193026e-04, 2.44073578e-04,
       3.08108899e-04])

In [63]:
fig = go.Figure(data =
    go.Bar(
        x = ["Phase-1", "L1T Tracking", "Increased L1T Accept", "Phase-2", "Data Reduction"],
        y= productivity
    ),
    )



fig.update_layout(width = 800,
                  height = 600,
                  title = "Productivity by System",
                  yaxis_title = "Productivity (Relevant Samples per Joule)",
                  xaxis_title = "System", )

fig.show()