In [1]:
from systemflow.graph import *
from systemflow.models import *
from systemflow.metrics import *

import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import plotly.graph_objects as go

In [2]:
# load data from the spreadsheet which defines the structure of the workflow,
# as well as the parameters for data rates, efficiency, data reduction, and classifier performance
# load data from the spreadsheet which defines the structure of the workflow,
# as well as the parameters for data rates, efficiency, data reduction, and classifier performance
run3_system = dataframes_from_spreadsheet("cms_system_60.xlsx")
run5_system = dataframes_from_spreadsheet("cms_system_200.xlsx")
run5_smartpx_system = dataframes_from_spreadsheet("cms_system_200_smartpx.xlsx")

In [3]:
run5_system.detectors

Unnamed: 0,Category,Detector,Data (bytes),Sample Rate,Compression,Link Efficiency (J/bit),Op Efficiency (J/op),PU 200
0,Tracking,Inner Tracker,1440000,40000000,0,2.22e-11,0,1.44
1,Tracking,Outer Tracker PS,720000,40000000,0,2.22e-11,0,0.72
2,Tracking,Outer Tracker 2S,430000,40000000,0,2.22e-11,0,0.43
3,Tracking,Track Finder TPG,10000,40000000,0,2.22e-11,0,0.01
4,Timing,MIP Timing BTL,240000,40000000,0,2.22e-11,0,0.24
5,Timing,MIP Timing ETL,440000,40000000,0,2.22e-11,0,0.44
6,Calorimetry,ECAL Barrel,600000,40000000,0,2.22e-11,0,0.6
7,Calorimetry,HCAL Barrel,240000,40000000,0,2.22e-11,0,0.24
8,Calorimetry,HCAL HO,30000,40000000,0,2.22e-11,0,0.03
9,Calorimetry,HCAL HF,60000,40000000,0,2.22e-11,0,0.06


In [4]:
#import the data predicting wall time scaling by pileup
scaling = pd.read_excel("wall time scaling.xlsx", sheet_name="Data")
#fit a polynomial to this data for CPU and GPU runtimes
fit_poly = lambda x, k3, k2, k1: k3 * x ** 3 + k2 * x ** 2 + k1 * x
k, cv = curve_fit(fit_poly, scaling["Size"], scaling["Wall Time"])
k_gpu, cv_gpu = curve_fit(fit_poly, scaling["Size"], scaling["Wall Time GPU"])

In [5]:
#define a dictionary with functions defining the scaling of trigger runtimes with incoming data
funcs = {"Global": lambda x: fit_poly(x, *k), "Intermediate": lambda x: x / 2.0e6}
funcs_gpu = {"Global": lambda x: fit_poly(x, *k_gpu), "Intermediate": lambda x: x / 2.0e6}

In [8]:
ex_run3 = construct_graph(run3_system.detectors, run3_system.processors, run3_system.globals, funcs)

  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  fit = lambda l: np.abs(self.egamma_rate - quad(lambda x: self.exp_dist(x, l) * interpolator(x), np.min(xs), np.max(xs))[0])
  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  trigger_rate = lambda l: quad(lambda x: exp_dist(x, l) * efficiency_fit(x), np.min(xs2), np.max(xs2))[0]


In [13]:
ex_run5 = construct_graph(run5_system.detectors, run5_system.processors, run5_system.globals, funcs)

  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  fit = lambda l: np.abs(self.egamma_rate - quad(lambda x: self.exp_dist(x, l) * interpolator(x), np.min(xs), np.max(xs))[0])
  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  trigger_rate = lambda l: quad(lambda x: exp_dist(x, l) * efficiency_fit(x), np.min(xs2), np.max(xs2))[0]


In [14]:
ex_run5_gpu = construct_graph(run5_system.detectors, run5_system.processors, run5_system.globals, funcs_gpu)

  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  fit = lambda l: np.abs(self.egamma_rate - quad(lambda x: self.exp_dist(x, l) * interpolator(x), np.min(xs), np.max(xs))[0])
  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  trigger_rate = lambda l: quad(lambda x: exp_dist(x, l) * efficiency_fit(x), np.min(xs2), np.max(xs2))[0]


In [15]:
from copy import deepcopy

In [16]:
ex_run5_l1t_gpu = deepcopy(ex_run5_gpu)
ex_run5_l1t_gpu.nodes["Intermediate"]["classifier"].skill_boost = 0.40

In [17]:
def extract_results(graph):

    power = graph.graph["op power"] + graph.graph["link power"]
    confusion = graph.graph["performance"]

    return power, confusion

In [18]:
def vary_system(graph, it_reduction: float):
    graph = deepcopy(graph)
    graph.nodes["Inner Tracker"]["sample data"] = it_reduction * graph.nodes["Inner Tracker"]["sample data"]
    
    graph = update_throughput(graph)

    power = graph.graph["op power"] + graph.graph["link power"]
    confusion = graph.graph["performance"]

    return power, confusion, graph

In [19]:
baseline = vary_system(ex_run5, 0.0)

In [20]:
baseline[0] / 1e6 / density_scale_model(ex_run5.graph["globals"]["Year"][0])

np.float64(32.91216194527279)

In [21]:
#vary this accept rate from today's rate to the planned Run-5 
it_reductions = np.linspace(1.0, 0.40, 101)

In [22]:
res_r5 = [vary_system(ex_run5, r) for r in it_reductions]

In [23]:
res_r5_gpu = [vary_system(ex_run5_gpu, r) for r in it_reductions]

In [24]:
res_r5_l1t_gpu = [vary_system(ex_run5_l1t_gpu, r) for r in it_reductions]

In [25]:
def extract_metrics(results):
    all_confusion = np.array([r[1] for r in results])

    all_power = [r[0] / density_scale_model(r[2].graph["globals"]["Year"][0]) for r in results]
    all_power = np.array(all_power)

    all_recall = np.array([recall(all_confusion[i,:,:]) for i in range(all_confusion.shape[0])])
    all_precision = np.array([precision(all_confusion[i,:,:]) for i in range(all_confusion.shape[0])])
    all_f1 = np.array([f1_score(all_confusion[i,:,:]) for i in range(all_confusion.shape[0])])
    productivity = np.array([np.sum(get_passed(all_confusion[i,:,:])) for i in range(all_confusion.shape[0])])

    metrics = {"confusion": all_confusion,
               "power": all_power,
               "recall": all_recall,
               "precision": all_precision,
               "f1 score": all_f1,
               "productivity": all_recall * productivity}

    return metrics

In [26]:
run5_metrics = extract_metrics(res_r5)

In [27]:
run5_metrics_gpu = extract_metrics(res_r5_gpu)

In [28]:
run5_metrics_l1t_gpu = extract_metrics(res_r5_l1t_gpu)

In [29]:
run5_metrics_l1t_gpu["f1 score"]

array([0.79427239, 0.79427239, 0.79427239, 0.79427239, 0.79427239,
       0.79427239, 0.79427239, 0.79427239, 0.79427239, 0.79427239,
       0.79427239, 0.79427239, 0.79427239, 0.79427239, 0.79427239,
       0.79427239, 0.79427239, 0.79427239, 0.79427239, 0.79427239,
       0.79427239, 0.79427239, 0.79427239, 0.79427239, 0.79427239,
       0.79427239, 0.79427239, 0.79427239, 0.79427239, 0.79427239,
       0.79427239, 0.79427239, 0.79427239, 0.79427239, 0.79427239,
       0.79427239, 0.79427239, 0.79427239, 0.79427239, 0.79427239,
       0.79427239, 0.79427239, 0.79427239, 0.79427239, 0.79427239,
       0.79427239, 0.79427239, 0.79427239, 0.79427239, 0.79427239,
       0.79427239, 0.79427239, 0.79427239, 0.79427239, 0.79427239,
       0.79427239, 0.79427239, 0.79427239, 0.79427239, 0.79427239,
       0.79427239, 0.79427239, 0.79427239, 0.79427239, 0.79427239,
       0.79427239, 0.79427239, 0.79427239, 0.79427239, 0.79427239,
       0.79427239, 0.79427239, 0.79427239, 0.79427239, 0.79427

In [30]:
output_rate = 7.5e3

In [31]:
fig = go.Figure(data = 
                go.Scatter(x = (1 - it_reductions)* 100,
                           y = run5_metrics["f1 score"] * output_rate / run5_metrics["power"] * 1000,
                           name = "Baseline"))

fig.add_trace(go.Scatter(x = (1 - it_reductions)* 100,
                           y = run5_metrics_gpu["f1 score"] * output_rate / run5_metrics_gpu["power"] * 1000,
                           name = "+GPU"))


fig.add_trace(go.Scatter(x = (1 - it_reductions)* 100,
                           y = run5_metrics_l1t_gpu["f1 score"] * output_rate / run5_metrics_l1t_gpu["power"] * 1000,
                           name = "+L1 Tracks, GPU"))

fig.update_layout(width =800, height = 600,
                  title = "Productivity of DAQ Systems by Inner Tracker Data Reduction",
                  xaxis_title = "Inner Tracker Data Reduction (%)",
                  yaxis_title = "Productivity (Relevant Samples/kJ)")
fig.add_annotation(x = -0.1, 
                   y = -0.15, 
                   showarrow=False,
                   text = "Pileup = 200<br>L1T Rejection = 53:1", 
                   xref="paper", 
                   yref="paper",
                   font = dict(size = 14))
fig.show()

In [32]:
fig = go.Figure(data = 
                go.Scatter(x = (1 - it_reductions)* 100,
                           y = run5_metrics["power"] ,
                           name = "Baseline"))

fig.add_trace(go.Scatter(x = (1 - it_reductions)* 100,
                           y = run5_metrics_gpu["power"],
                           name = "+GPU"))


fig.add_trace(go.Scatter(x = (1 - it_reductions)* 100,
                           y = run5_metrics_l1t_gpu["power"],
                           name = "+L1 Tracks, GPU"))

fig.update_layout(width =800, height = 600,
                  title = "Total DAQ Power by Inner Tracker Data Reduction",
                  xaxis_title = "Inner Tracker Data Reduction (%)",
                  yaxis_title = "DAQ Power (W)")
fig.add_annotation(x = -0.1, 
                   y = -0.15, 
                   showarrow=False,
                   text = "Pileup = 200<br>L1T Rejection = 53:1", 
                   xref="paper", 
                   yref="paper",
                   font = dict(size = 14))
fig.show()