In [21]:
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import mplhep as hep
from coffea.util import load
import numpy as np
import pandas as pd
import awkward as ak
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
from numba import njit
import vector
vector.register_numba()
vector.register_awkward()

from pprint import pprint 
hep.style.use(hep.style.ROOT)

import hist

# Loading the exported dataset

In [22]:
df = load("/eos/user/d/dvalsecc/www/ttHbbAnalysis/training_dataset/output_ttHTobb_2018_v7.coffea")

In [23]:
df["cutflow"]

{'initial': {'ttHTobb': 9668000},
 'skim': {'ttHTobb': 1607845},
 'presel': {'ttHTobb': 763132},
 'semilep_LHE': {'ttHTobb': 569993}}

In [24]:
df["sumw"]

{'semilep_LHE': {'ttHTobb': 929.9404434096181}}

In [25]:
cs = df["columns"]['ttHTobb']["semilep_LHE"]

partons = ak.unflatten(ak.zip({"pt": cs["Parton_pt"].value,
                              "eta": cs["Parton_eta"].value,
                              "phi": cs["Parton_phi"].value,
                               "mass": cs["Parton_mass"].value,
                              "pdgId": cs["Parton_pdgId"].value, 
                              "prov": cs["Parton_provenance"].value},
                         with_name='Momentum4D'),
                     cs["Parton_N"].value)

partons_matched = ak.unflatten(ak.zip({"pt": cs["PartonMatched_pt"].value,
                              "eta": cs["PartonMatched_eta"].value,
                              "phi": cs["PartonMatched_phi"].value,
                              "mass": cs["PartonMatched_mass"].value,
                              "pdgId": cs["PartonMatched_pdgId"].value, 
                              "prov": cs["PartonMatched_provenance"].value},
                         with_name='Momentum4D'),
                     cs["PartonMatched_N"].value)

jets = ak.unflatten(ak.zip({"pt": cs["JetGood_pt"].value,
                              "eta": cs["JetGood_eta"].value,
                              "phi": cs["JetGood_phi"].value,
                              "btag": cs["JetGood_btagDeepFlavB"].value,
                              "m": np.zeros(cs["JetGood_btagDeepFlavB"].value.shape)},
                         with_name='Momentum4D'),
                     cs["JetGood_N"].value)

jets_matched = ak.unflatten(ak.zip({"pt": cs["JetGoodMatched_pt"].value,
                              "eta": cs["JetGoodMatched_eta"].value,
                              "phi": cs["JetGoodMatched_phi"].value,
                              "btag": cs["JetGoodMatched_btagDeepFlavB"].value,
                              "prov": cs["PartonMatched_provenance"].value,
                              "m": np.zeros(cs["PartonMatched_provenance"].value.shape)},
                         with_name='Momentum4D'),
                     cs["JetGoodMatched_N"].value)


generator_info = ak.zip({"pdgid1": cs["Generator_id1"].value,
                              "pdgid2": cs["Generator_id2"].value,
                              "x1": cs["Generator_x1"].value,
                              "x2": cs["Generator_x2"].value},
                         )


lepton_partons = ak.unflatten(ak.zip({"pt": cs["LeptonParton_pt"].value,
                              "eta": cs["LeptonParton_eta"].value,
                              "phi": cs["LeptonParton_phi"].value,
                              "mass": cs["LeptonParton_mass"].value,
                              "pdgId": cs["LeptonParton_pdgId"].value},
                         with_name='Momentum4D'),
                     cs["LeptonParton_N"].value)


lepton = ak.zip({"pt": cs["LeptonGood_pt"].value,
                              "eta": cs["LeptonGood_eta"].value,
                              "phi": cs["LeptonGood_phi"].value,
                              "m": np.zeros(cs["LeptonGood_pt"].value.shape)},
                         with_name='Momentum4D')


met = ak.zip({"pt": cs["MET_pt"].value,
              "eta":  np.zeros(cs["MET_pt"].value.shape),
              "phi": cs["MET_phi"].value,
              "m": np.zeros(cs["MET_pt"].value.shape)},
         with_name='Momentum4D')

higgs = ak.zip({"pt": cs["HiggsParton_pt"].value,
                              "eta": cs["HiggsParton_eta"].value,
                              "phi": cs["HiggsParton_phi"].value,
                              "m": cs["HiggsParton_mass"].value},
                         with_name='Momentum4D')


In [26]:
jets_matched = ak.mask(jets_matched, jets_matched.pt==-999, None)
partons_matched = ak.mask(partons_matched, partons_matched.pt==-999, None)
is_jet_matched = ~ak.is_none(jets_matched, axis=1)
jets = ak.with_field(jets, is_jet_matched, "matched")

# Filling with -1 the not matched provenance
jets = ak.with_field(jets, ak.fill_none(jets_matched.prov, -1), "prov")

Jets and partons_matched arrays are **aligned**.

In [28]:
df = ak.zip({
    "jets": jets,
    "partons_matched": partons_matched,
    "partons": partons,
    "generator_info": generator_info,
    "lepton_partons":lepton_partons,
    "lepton_reco": lepton,
    "met": met,
    "higgs": higgs
    }, depth_limit=1)

ak.to_parquet(df, "/eos/user/d/dvalsecc/www/ttHbbAnalysis/training_dataset/all_jets_v7.parquet")

In [29]:
higgs

<MomentumArray4D [{pt: 59.2, eta: 1.37, ... m: 125}] type='569993 * Momentum4D["...'>