In [1]:
import uproot, numpy as np, awkward as ak
from glob import glob
from tqdm import tqdm
from collections import defaultdict
import awkward as ak
import os, pathlib
import ROOT
%load_ext autoreload
%autoreload 2

from utils import *


inputFolder = "/eos/cms/store/group/dpg_hgcal/comm_hgcal/wredjeb/ForGNN/MultiParticle_5_10_0PU/CMSSW_15_1_X/D110/CloseByMP_ForGNN_0PU/histo"
fileList    = glob(f"{inputFolder}/*.root")
print("Found", len(fileList), "ROOT files")

tmp_store = defaultdict(list)

for f in tqdm(fileList[:10], desc="reading"):
    with uproot.open(f)["ticlDumper"] as tdir:
        for name, obj in tdir.items():
            if isinstance(obj, uproot.behaviors.TTree.TTree):
                key = name.split(";")[0]
                tmp_store[key].append(obj.arrays(library="ak"))

dump_data = {name: ak.concatenate(arr_list) for name, arr_list in tmp_store.items()}
print("Trees read:", list(dump_data))


outdir = pathlib.Path("parquet_out")
outdir.mkdir(exist_ok=True)

for name, arr in dump_data.items():
    path = outdir / f"{name}.parquet"
    ak.to_parquet(arr, path, compression=None)
    print("Wrote", path)


Found 108 ROOT files


reading: 100%|██████████| 10/10 [03:16<00:00, 19.68s/it]


Trees read: ['clusters', 'ticlTrackstersCLUE3DHigh', 'ticlTracksterLinks', 'ticlCandidate', 'ticlTracksterLinksSuperclusteringDNN', 'simtrackstersSC', 'simtrackstersCP', 'candidates', 'TICLGraph', 'associations', 'simTICLCandidate']
Wrote parquet_out/clusters.parquet
Wrote parquet_out/ticlTrackstersCLUE3DHigh.parquet
Wrote parquet_out/ticlTracksterLinks.parquet
Wrote parquet_out/ticlCandidate.parquet
Wrote parquet_out/ticlTracksterLinksSuperclusteringDNN.parquet
Wrote parquet_out/simtrackstersSC.parquet
Wrote parquet_out/simtrackstersCP.parquet
Wrote parquet_out/candidates.parquet
Wrote parquet_out/TICLGraph.parquet
Wrote parquet_out/associations.parquet
Wrote parquet_out/simTICLCandidate.parquet


In [4]:
#If files have been already dumped in parquet format
import uproot, numpy as np, awkward as ak
from glob import glob
from tqdm import tqdm
from collections import defaultdict
import awkward as ak
import os, pathlib
import ROOT
%load_ext autoreload
%autoreload 2

from utils import *

parq_dir   = pathlib.Path("parquet_out")
parq_files = parq_dir.glob("*.parquet")           

dump_data = {p.stem: ak.from_parquet(p) for p in parq_files}

# ── usage ────────────────────────────────────────────────────────────────
ticlTrackstersCLUE3D = dump_data["ticlTrackstersCLUE3DHigh"]
ticlCandidate = dump_data["ticlCandidate"]
simTrackstersCP = dump_data["simtrackstersCP"] 
associations = dump_data["associations"]
assert len(associations) == len(simTrackstersCP)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


AssertionError: 

In [8]:
from tqdm import tqdm
sim_to_reco_tables = []
reco_to_sim_tables = []
all_merged_reco = []

rng = random.Random(123)
for ev in tqdm(range(10)):
    reco_ev = ticlCandidate[ev]
    sim_ev  = simTrackstersCP[ev]
    merged_reco = [Trackster.from_event_all(reco_ev, i) for i in range(len(reco_ev.raw_energy))]
    all_merged_reco.append(merged_reco)
    if len(sim_ev.raw_energy) == 0:
        score_tables.append([])
        continue
    # merged_reco = random_group_merging(reco_ev, rng=rng)
    sim_to_reco_tables.append(
        compute_sim_to_reco_scores(sim_ev, merged_reco, best_only=True)
    )
    reco_to_sim_tables.append(
        compute_reco_to_sim_scores(sim_ev, merged_reco, best_only=False)
    )
    all_merged_reco.append(merged_reco)

data_efficiency = aggregate_for_efficiency(simTrackstersCP, sim_to_reco_tables,
                           score_threshold=0.6)
data_merged = aggregate_for_merge(all_merged_reco, reco_to_sim_tables,
                           score_threshold=0.6)
data_fake = aggregate_for_fake(all_merged_reco, reco_to_sim_tables,
                           score_threshold=0.6)


100%|██████████| 10/10 [00:08<00:00,  1.21it/s]


In [None]:
#My Algo

# ------------------------------------------------------------
# random group merge of Reco tracksters in one event
# ------------------------------------------------------------
def random_group_merging(event_rec, *, max_group_size=3, rng=None):
    if rng is None:
        rng = random
    n = len(event_rec.raw_energy)
    if n == 0:
        return []

    idxs = list(range(n))
    rng.shuffle(idxs)

    groups, cur = [], 0
    while cur < n:
        g = min(rng.randint(1, max_group_size), n - cur)
        groups.append(idxs[cur:cur + g])
        cur += g

    out = []
    for g in groups:
        trklist = [Trackster.from_event_all(event_rec, i) for i in g]
        out.append(trklist[0] if len(trklist) == 1
                   else functools.reduce(lambda a, b: a.merge(b), trklist))
    return out


In [None]:
from tqdm import tqdm
sim_to_reco_tables = []
reco_to_sim_tables = []
all_merged_reco = []

rng = random.Random(123)
for ev in tqdm(range(10)):
    reco_ev = ticlTrackstersCLUE3D[ev]
    sim_ev  = simTrackstersCP[ev]
    if len(sim_ev.raw_energy) == 0:
        score_tables.append([])
        continue
    merged_reco = random_group_merging(reco_ev, rng=rng)
    sim_to_reco_tables.append(
        compute_sim_to_reco_scores(sim_ev, merged_reco, best_only=True)
    )
    reco_to_sim_tables.append(
        compute_reco_to_sim_scores(sim_ev, merged_reco, best_only=False)
    )
    all_merged_reco.append(merged_reco)

data_efficiency = aggregate_for_efficiency(simTrackstersCP, sim_to_reco_tables,
                           score_threshold=0.6)
data_merged = aggregate_for_merge(all_merged_reco, reco_to_sim_tables,
                           score_threshold=0.6)
data_fake = aggregate_for_fake(all_merged_reco, reco_to_sim_tables,
                           score_threshold=0.6)


In [None]:
plot_all_metrics(
    data_efficiency,
    data_merged,
    data_fake,
    out_file="ticl_metrics_grid.pdf"   # omit if you don’t need a file
)