In [1]:
from systemflow.graph import *

import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import plotly.graph_objects as go

In [2]:
from copy import deepcopy

In [3]:
# load data from the spreadsheet which defines the structure of the workflow,
# as well as the parameters for data rates, efficiency, data reduction, and classifier performance
run3_system = dataframes_from_spreadsheet("cms_system_60.xlsx")
run5_system = dataframes_from_spreadsheet("cms_system_200.xlsx")

In [4]:
run5_system.detectors

Unnamed: 0,Category,Detector,Data (bytes),Sample Rate,Compression,Link Efficiency (J/bit),Op Efficiency (J/op),PU 200
0,Tracking,Inner Tracker,1440000,40000000,0,2.22e-11,0,1.44
1,Tracking,Outer Tracker PS,720000,40000000,0,2.22e-11,0,0.72
2,Tracking,Outer Tracker 2S,430000,40000000,0,2.22e-11,0,0.43
3,Tracking,Track Finder TPG,10000,40000000,0,2.22e-11,0,0.01
4,Timing,MIP Timing BTL,240000,40000000,0,2.22e-11,0,0.24
5,Timing,MIP Timing ETL,440000,40000000,0,2.22e-11,0,0.44
6,Calorimetry,ECAL Barrel,600000,40000000,0,2.22e-11,0,0.6
7,Calorimetry,HCAL Barrel,240000,40000000,0,2.22e-11,0,0.24
8,Calorimetry,HCAL HO,30000,40000000,0,2.22e-11,0,0.03
9,Calorimetry,HCAL HF,60000,40000000,0,2.22e-11,0,0.06


In [5]:
#import the data predicting wall time scaling by pileup
scaling = pd.read_excel("wall time scaling.xlsx", sheet_name="Data")
#fit a polynomial to this data for CPU and GPU runtimes
fit_poly = lambda x, k3, k2, k1: k3 * x ** 3 + k2 * x ** 2 + k1 * x
k, cv = curve_fit(fit_poly, scaling["Size"], scaling["Wall Time"])
k_gpu, cv_gpu = curve_fit(fit_poly, scaling["Size"], scaling["Wall Time GPU"])

In [6]:
#define a dictionary with functions defining the scaling of trigger runtimes with incoming data
funcs = {"Global": lambda x: fit_poly(x, *k), "Intermediate": lambda x: x / 2.0e6}
funcs_gpu = {"Global": lambda x: fit_poly(x, *k_gpu), "Intermediate": lambda x: x / 2.0e6}

In [7]:
#baseline_r3 = construct_graph(run3_system.detectors, run3_system.processors, run3_system.globals, funcs)

In [8]:
np.max([1.0, 2.0])

np.float64(2.0)

In [9]:
baseline = construct_graph(run5_system.detectors, run5_system.processors, run5_system.globals, funcs)

  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  fit = lambda l: np.abs(self.egamma_rate - quad(lambda x: self.exp_dist(x, l) * interpolator(x), np.min(xs), np.max(xs))[0])
  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  trigger_rate = lambda l: quad(lambda x: exp_dist(x, l) * efficiency_fit(x), np.min(xs2), np.max(xs2))[0]


In [10]:
baseline.nodes["Intermediate"]["routing latency"]

9e-07

In [12]:
baseline.nodes["Intermediate"]

{'type': 'processor',
 'reduction ratio': np.float64(53.3),
 'classifier': <systemflow.classifier.L1TClassifier at 0x121e19ed0>,
 'data reduction': np.float64(1.0),
 'op efficiency': np.float64(0.003),
 'op latency': 1e-09,
 'routing latency': 9e-07,
 'sample data': np.int64(260000),
 'complexity': <function __main__.<lambda>(x)>,
 'parallelism': <function systemflow.graph.processors.<locals>.<lambda>(x)>,
 'global ratio': np.float64(5330.0),
 'message size': np.float64(8425000.0),
 'ops': np.float64(4.2125),
 'input rate': np.int64(39999999),
 'error matrix': array([[0.98126, 0.51054],
        [0.01874, 0.48946]]),
 'contingency': array([[39243035,     3831],
        [  749459,     3672]]),
 'discards': array([39243035,     3831]),
 'output rate': np.int64(753131),
 'energy': np.float64(0.012637500000000001),
 'power': np.float64(505499.98736250005)}

In [18]:
def propagate_latency(graph, node):
    def arrival_latency(predecessors):
        latencies = [graph.nodes[n]["routing latency"] for n in predecessors]
        if len(latencies) > 0:
            latency = np.max(latencies)
        else:
            latency = 0.0
        
        return latency
    
    def traverse(start):
        up = list(graph.predecessors(start))
        this_node = graph.nodes[start]
        processing_latency = this_node["op latency"] * this_node["parallelism"](this_node["ops"])

        if len(up) == 0:
            message_time = processing_latency
        else:
            message_time = processing_latency + arrival_latency(up) + np.max(list(map(traverse, up)))
        this_node["message_time"] = message_time
        return message_time

    traverse(node)
    
    return

In [19]:
propagate_latency(baseline, "Disk")

In [20]:
baseline.nodes["Intermediate"]["message_time"]

np.float64(1.803e-06)

In [21]:
baseline.nodes["Disk"]["message_time"]

np.float64(3.605e-06)

In [11]:
baseline.nodes["Intermediate"]["energy"] * np.sum(baseline.nodes["Intermediate"]["discards"])

np.float64(496013.5974375)

In [12]:
a1 = list(baseline.predecessors("Intermediate"))

In [13]:
a1

['Tracking', 'Timing', 'Calorimetry', 'Muon']

In [14]:
list(baseline.successors("Intermediate"))

['Global']

In [15]:
"Intermediate" in baseline.nodes.keys()

True

In [16]:
"""
Return the amount of energy expended by the system to reach the current node
"""
def upstream_energy(graph, node):
    def get_energy(node):
        if "energy" in node.keys():
            return node["energy"]
        else:
            return 0.0
    
    def traverse(start):
        up = list(graph.predecessors(start))

        if len(up) == 0:
            return get_energy(graph.nodes[start])
        else:
            return get_energy(graph.nodes[start]) + functools.reduce(lambda x, y: x + y, map(traverse, up))
    
    return traverse(node)

In [17]:
upstream_energy(baseline, "Intermediate")

np.float64(0.012637500000000001)

In [18]:
baseline.graph["Root Node"]

'Disk'

In [19]:
baseline.nodes

NodeView(('Inner Tracker', 'Outer Tracker PS', 'Outer Tracker 2S', 'Track Finder TPG', 'MIP Timing BTL', 'MIP Timing ETL', 'ECAL Barrel', 'HCAL Barrel', 'HCAL HO', 'HCAL HF', 'HGCAL', 'HGCAL TPG Stage1', 'HGCAL TPG Stage2', 'Muon DT', 'Muon CSC', 'Muon GEM GE1', 'Muon GEM GE2', 'Muon GEM ME0', 'Muon RPC', 'Tracking', 'Timing', 'Calorimetry', 'Muon', 'Intermediate', 'Global', 'Disk'))

In [20]:
upstream_energy(baseline, "Disk")

np.float64(446.05484938484136)

In [21]:
baseline.nodes["Inner Tracker"]["global ratio"]

np.float64(5330.0)

In [22]:
(5330 * 8.3) / 446

99.19058295964128

In [23]:
5330 * 8.3

44239.00000000001

In [24]:
baseline.nodes["Global"]["discards"]

array([742442,    694])

In [25]:
baseline.nodes["Intermediate"]["energy"] * baseline.nodes["Intermediate"]["output rate"]

np.float64(9486.364650000001)

In [27]:
gpu = construct_graph(run5_system.detectors, run5_system.processors, run5_system.globals, funcs_gpu)

  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  fit = lambda l: np.abs(self.egamma_rate - quad(lambda x: self.exp_dist(x, l) * interpolator(x), np.min(xs), np.max(xs))[0])
  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  trigger_rate = lambda l: quad(lambda x: exp_dist(x, l) * efficiency_fit(x), np.min(xs2), np.max(xs2))[0]


In [28]:
gpu.nodes["Disk"]

{'type': 'storage',
 'reduction ratio': np.float64(1.0),
 'classifier': <systemflow.classifier.DummyClassifier at 0x11b89ee60>,
 'data reduction': np.float64(1.0),
 'op efficiency': np.float64(0.0),
 'sample data': np.int64(0),
 'complexity': <function systemflow.graph.processors.<locals>.<lambda>(x)>,
 'global ratio': np.float64(1.0),
 'message size': np.float64(8425000.0),
 'ops': np.float64(8425000.0),
 'input rate': np.int64(7485),
 'error matrix': array([[0., 0.],
        [1., 1.]]),
 'contingency': array([[   0,    0],
        [4542, 2943]]),
 'discards': array([0, 0]),
 'output rate': np.int64(7485),
 'energy': np.float64(0.0),
 'power': np.float64(0.0)}

In [29]:
baseline_2 = deepcopy(baseline)
baseline_2.nodes["Intermediate"]["reduction ratio"] = 400
baseline_2 = update_throughput(baseline_2)


In [30]:
l1t = deepcopy(baseline)
l1t.nodes["Intermediate"]["classifier"].skill_boost = 0.40
l1t = update_throughput(l1t)

In [31]:
smpx = deepcopy(baseline)
smpx.nodes["Inner Tracker"]["sample data"] *= (1 - 0.54)
smpx = update_throughput(smpx)

In [32]:
gpu_smpx = deepcopy(gpu)
gpu_smpx.nodes["Inner Tracker"]["sample data"] *= (1 - 0.54)
gpu_smpx = update_throughput(gpu_smpx)

In [33]:
gpu_l1t = deepcopy(gpu)
gpu_l1t.nodes["Intermediate"]["classifier"].skill_boost = 0.40
gpu_l1t = update_throughput(gpu_l1t)

In [34]:
smpx_l1t = deepcopy(baseline)
smpx_l1t.nodes["Inner Tracker"]["sample data"] *= (1 - 0.54)
smpx_l1t.nodes["Intermediate"]["classifier"].skill_boost = 0.40
smpx_l1t = update_throughput(smpx_l1t)

In [35]:
gpu_smpx_l1t = deepcopy(gpu)
gpu_smpx_l1t.nodes["Intermediate"]["classifier"].skill_boost = 0.40
gpu_smpx_l1t.nodes["Inner Tracker"]["sample data"] *= (1 - 0.54)
gpu_smpx_l1t = update_throughput(gpu_smpx_l1t)

In [36]:
np.sum(gpu.graph["performance"][:,1])

np.int64(7502)

In [37]:
gpu.nodes["Intermediate"]

{'type': 'processor',
 'reduction ratio': np.float64(53.3),
 'classifier': <systemflow.classifier.L1TClassifier at 0x11b7a0100>,
 'data reduction': np.float64(1.0),
 'op efficiency': np.float64(0.003),
 'sample data': np.int64(260000),
 'complexity': <function __main__.<lambda>(x)>,
 'global ratio': np.float64(5330.0),
 'message size': np.float64(8425000.0),
 'ops': np.float64(4.2125),
 'input rate': np.int64(39999999),
 'error matrix': array([[0.98132, 0.51684],
        [0.01868, 0.48316]]),
 'contingency': array([[39245435,     3878],
        [  747059,     3625]]),
 'discards': array([39245435,     3878]),
 'output rate': np.int64(750684),
 'energy': np.float64(0.012637500000000001),
 'power': np.float64(505499.98736250005)}

In [38]:
has_classifier(gpu.nodes["Intermediate"])

True

In [39]:
ac = active_classifiers(gpu)

In [40]:
[downstream_classifier(gpu, c) for c in ac]

[True, False]

In [41]:
gpu.nodes["Intermediate"]["contingency"]

array([[39245435,     3878],
       [  747059,     3625]])

In [42]:
gpu.nodes["Global"]["contingency"]

array([[742516,    681],
       [  4542,   2943]])

In [43]:
pipeline_contingency(gpu)

array([[39987951,     4559],
       [    4542,     2943]])

In [44]:
precision(gpu.nodes["Intermediate"]["contingency"])

np.float64(0.004828929349766346)

In [45]:
recall(gpu.nodes["Intermediate"]["contingency"])

np.float64(0.4831400773024124)

In [46]:
gpu.graph

{'globals':    Year
 0  2032,
 'Root Node': 'Disk',
 'link power': np.float64(124601.67313200001),
 'op power': np.float64(167923874.64119467),
 'performance': array([[39987951,     4559],
        [    4542,     2943]])}

In [47]:
def extract_results(graph):
    power = (graph.graph["op power"] + graph.graph["link power"]) / density_scale_model(2032)
    confusion = graph.graph["performance"]
    acc = precision(confusion)
    rec = recall(confusion)
    f1 = f1_score(confusion)
    prod = f1 * np.sum(get_passed(confusion)) / power


    return power, acc, rec, f1, prod

In [48]:
conditions = [baseline_r3, baseline_2, baseline]

In [49]:
pileup = np.array([60, 200, 200])[:,np.newaxis]
rejection = np.array([400, 400, 53])[:,np.newaxis]

In [50]:
pileup.shape

(3, 1)

In [51]:
cond_results = np.stack([extract_results(g) for g in conditions])

In [52]:
cond_results = np.concatenate((pileup, rejection, cond_results), axis=1)

In [53]:
df2 = pd.DataFrame(cond_results, columns = ["Pileup", "L1T Reduction Ratio", "Power (W)", "Accuracy (%)", "Recall (%)", "F1 Score (%)", "Productivity (Relevant Samples/J)"])

In [54]:
df2

Unnamed: 0,Pileup,L1T Reduction Ratio,Power (W),Accuracy (%),Recall (%),F1 Score (%),Productivity (Relevant Samples/J)
0,60.0,400.0,322348.1,0.224346,0.223447,0.223896,0.00069
1,200.0,400.0,6978288.0,0.22333,0.224449,0.223888,3.2e-05
2,200.0,53.0,51611500.0,0.38568,0.386297,0.385988,5.6e-05


In [55]:
extract_results(baseline_2)[-1] * 1000

np.float64(0.03217977057509017)

In [56]:
extract_results(baseline)[-1] * 1000

np.float64(0.05619514953090941)

In [57]:
extract_results(baseline_r3)[-1] * 1000

np.float64(0.6904096578525566)

In [58]:
extract_results(baseline_2)[-1] * 1000

np.float64(0.03217977057509017)

In [59]:
extract_results(baseline)[-1] * 1000

np.float64(0.05619514953090941)

In [60]:
all_graphs = [baseline_r3, baseline, gpu, l1t, smpx, gpu_l1t, smpx_l1t, gpu_smpx, gpu_smpx_l1t]

In [61]:
pileup = np.array([[60, 200, 200, 200, 200, 200, 200, 200, 200,],])
rejection = np.array([[400, 53, 53, 53, 53, 53, 53, 53, 53],])
has_gpu = [False, False, True, False, False, True, False, True, True]
has_smpx = [False, False, False, False, True, False, True, True, True]
has_l1t = [False, False, False, True, False, True, True, False, True]

In [62]:
results = np.stack([extract_results(g) for g in all_graphs])

In [63]:
results

array([[3.22348053e+05, 2.24346076e-01, 2.23446894e-01, 2.23895582e-01,
        6.90409658e-04],
       [5.16114995e+07, 3.85680064e-01, 3.86296987e-01, 3.85988279e-01,
        5.61951495e-05],
       [2.58553201e+07, 3.93186373e-01, 3.92295388e-01, 3.92740375e-01,
        1.13696589e-04],
       [5.16508912e+07, 7.94995316e-01, 7.91922154e-01, 7.93455760e-01,
        1.14799469e-04],
       [4.08443906e+07, 3.85680064e-01, 3.86296987e-01, 3.85988279e-01,
        7.10089166e-05],
       [2.58743294e+07, 1.00000000e+00, 7.98053852e-01, 8.87686263e-01,
        2.05399629e-04],
       [4.08755559e+07, 7.94995316e-01, 7.91922154e-01, 7.93455760e-01,
        1.45062122e-04],
       [2.04670348e+07, 3.93186373e-01, 3.92295388e-01, 3.92740375e-01,
        1.43629096e-04],
       [2.04820742e+07, 1.00000000e+00, 7.98053852e-01, 8.87686263e-01,
        2.59474583e-04]])

In [64]:
pileup.shape

(1, 9)

In [65]:
rejection.shape

(1, 9)

In [66]:
results = np.concatenate((pileup, rejection, np.transpose(results)), axis=0)

In [67]:
results

array([[6.00000000e+01, 2.00000000e+02, 2.00000000e+02, 2.00000000e+02,
        2.00000000e+02, 2.00000000e+02, 2.00000000e+02, 2.00000000e+02,
        2.00000000e+02],
       [4.00000000e+02, 5.30000000e+01, 5.30000000e+01, 5.30000000e+01,
        5.30000000e+01, 5.30000000e+01, 5.30000000e+01, 5.30000000e+01,
        5.30000000e+01],
       [3.22348053e+05, 5.16114995e+07, 2.58553201e+07, 5.16508912e+07,
        4.08443906e+07, 2.58743294e+07, 4.08755559e+07, 2.04670348e+07,
        2.04820742e+07],
       [2.24346076e-01, 3.85680064e-01, 3.93186373e-01, 7.94995316e-01,
        3.85680064e-01, 1.00000000e+00, 7.94995316e-01, 3.93186373e-01,
        1.00000000e+00],
       [2.23446894e-01, 3.86296987e-01, 3.92295388e-01, 7.91922154e-01,
        3.86296987e-01, 7.98053852e-01, 7.91922154e-01, 3.92295388e-01,
        7.98053852e-01],
       [2.23895582e-01, 3.85988279e-01, 3.92740375e-01, 7.93455760e-01,
        3.85988279e-01, 8.87686263e-01, 7.93455760e-01, 3.92740375e-01,
        8.8

In [68]:
df = pd.DataFrame(results.transpose(), columns = ["Pileup", "L1T Reduction Ratio", "Power (W)", "Accuracy (%)", "Recall (%)", "F1 Score (%)", "Productivity (Relevant Samples/J)"])

In [69]:
df["GPU HLT"] = has_gpu
df["L1T Tracking"] = has_l1t
df["Smart Sensors"] = has_smpx

In [70]:
df

Unnamed: 0,Pileup,L1T Reduction Ratio,Power (W),Accuracy (%),Recall (%),F1 Score (%),Productivity (Relevant Samples/J),GPU HLT,L1T Tracking,Smart Sensors
0,60.0,400.0,322348.1,0.224346,0.223447,0.223896,0.00069,False,False,False
1,200.0,53.0,51611500.0,0.38568,0.386297,0.385988,5.6e-05,False,False,False
2,200.0,53.0,25855320.0,0.393186,0.392295,0.39274,0.000114,True,False,False
3,200.0,53.0,51650890.0,0.794995,0.791922,0.793456,0.000115,False,True,False
4,200.0,53.0,40844390.0,0.38568,0.386297,0.385988,7.1e-05,False,False,True
5,200.0,53.0,25874330.0,1.0,0.798054,0.887686,0.000205,True,True,False
6,200.0,53.0,40875560.0,0.794995,0.791922,0.793456,0.000145,False,True,True
7,200.0,53.0,20467030.0,0.393186,0.392295,0.39274,0.000144,True,False,True
8,200.0,53.0,20482070.0,1.0,0.798054,0.887686,0.000259,True,True,True


In [71]:
df.iloc[1:]

Unnamed: 0,Pileup,L1T Reduction Ratio,Power (W),Accuracy (%),Recall (%),F1 Score (%),Productivity (Relevant Samples/J),GPU HLT,L1T Tracking,Smart Sensors
1,200.0,53.0,51611500.0,0.38568,0.386297,0.385988,5.6e-05,False,False,False
2,200.0,53.0,25855320.0,0.393186,0.392295,0.39274,0.000114,True,False,False
3,200.0,53.0,51650890.0,0.794995,0.791922,0.793456,0.000115,False,True,False
4,200.0,53.0,40844390.0,0.38568,0.386297,0.385988,7.1e-05,False,False,True
5,200.0,53.0,25874330.0,1.0,0.798054,0.887686,0.000205,True,True,False
6,200.0,53.0,40875560.0,0.794995,0.791922,0.793456,0.000145,False,True,True
7,200.0,53.0,20467030.0,0.393186,0.392295,0.39274,0.000144,True,False,True
8,200.0,53.0,20482070.0,1.0,0.798054,0.887686,0.000259,True,True,True


In [72]:
df.iloc[1:]["Productivity (Relevant Samples/J)"] * 1000

1    0.056195
2    0.113697
3    0.114799
4    0.071009
5    0.205400
6    0.145062
7    0.143629
8    0.259475
Name: Productivity (Relevant Samples/J), dtype: float64

In [73]:
df["Productivity (Relevant Samples/J)"] * 1e3

0    0.690410
1    0.056195
2    0.113697
3    0.114799
4    0.071009
5    0.205400
6    0.145062
7    0.143629
8    0.259475
Name: Productivity (Relevant Samples/J), dtype: float64

In [74]:
df.to_excel("experimental_table.xlsx", index=False)

In [75]:
density_scale_model(2032)

np.float64(6.499570514329353)