In [1]:
from ROOT import TFile
import pandas as pd


options={}
options["input_data"] = "/public/data/RNN/Samples/InclusivePt/small_data.root"
#assert path.exists(options["input_data"]) is True, "invalid input_data path"
options["run_location"] = "/public/data/RNN/runs"
options["run_label"] = "anil_test"
options["tree_name"] = "NormalizedTree"
options["output_folder"] = "./Outputs/"
options["model_path"] = options["output_folder"] + "saved_model.pt"
options["architecture_type"] = "GRU"  # RNN, LSTM, GRU, DeepSets, SetTransformer
options["dropout"] = 0.3
options[
    "track_ordering"
] = "low-to-high-pt"  # None, "high-to-low-pt", "low-to-high-pt", "near-to-far", "far-to-near"
    # options["additional_appended_features"] = ["baseline_topoetcone20", "baseline_topoetcone30", "baseline_topoetcone40", "baseline_eflowcone20", "baseline_ptcone20", "baseline_ptcone30", "baseline_ptcone40", "baseline_ptvarcone20", "baseline_ptvarcone30", "baseline_ptvarcone40"]
options["additional_appended_features"] = []
options["lr"] = 0.001
options["ignore_features"] = [
        "baseline_topoetcone20",
        "baseline_topoetcone30",
        "baseline_topoetcone40",
        "baseline_eflowcone20",
        "baseline_ptcone20",
        "baseline_ptcone30",
        "baseline_ptcone40",
        "baseline_ptvarcone20",
        "baseline_ptvarcone30",
        "baseline_ptvarcone40",
        "baseline_eflowcone20_over_pt",
        "trk_vtx_type",
]
options["training_split"] = 0.7
options["batch_size"] = 32
options["n_epochs"] = 10
options["n_layers"] = 3
options["hidden_neurons"] = 256
options["intrinsic_dimensions"] = 1024  # only matters for deep sets
options["output_neurons"] = 2
options["save_model"] = True
options["model_save_path"] = options["output_folder"] + "test_gru_model.pth"
options["train_BDT"] = True


Welcome to JupyROOT 6.21/01


In [2]:
data_file = TFile(options["input_data"])
data_tree = getattr(data_file,options["tree_name"])
options["branches"] = [i.GetName() for i in data_tree.GetListOfBranches() if i.GetName() not in options["ignore_features"]]

In [3]:
options["branches"]

['event_n',
 'pdgID',
 'truth_type',
 'baseline_ptcone20_over_pt',
 'baseline_ptcone30_over_pt',
 'baseline_ptcone40_over_pt',
 'baseline_ptvarcone20_over_pt',
 'baseline_ptvarcone30_over_pt',
 'baseline_ptvarcone40_over_pt',
 'baseline_topoetcone20_over_pt',
 'baseline_topoetcone30_over_pt',
 'baseline_topoetcone40_over_pt',
 'baseline_PLT',
 'lep_pT',
 'ROC_slicing_lep_pT',
 'lep_eta',
 'lep_theta',
 'lep_phi',
 'lep_d0',
 'lep_d0_over_sigd0',
 'lep_z0',
 'lep_dz0',
 'lep_has_associated_jet',
 'lep_DL1r',
 'trk_lep_dR',
 'trk_pT',
 'trk_eta',
 'trk_phi',
 'trk_d0',
 'trk_z0',
 'trk_lep_dEta',
 'trk_lep_dPhi',
 'trk_lep_dD0',
 'trk_lep_dZ0',
 'trk_chi2',
 'trk_charge',
 'trk_nIBLHits',
 'trk_nPixHits',
 'trk_nPixHoles',
 'trk_nPixOutliers',
 'trk_nSCTHits',
 'trk_nSCTHoles',
 'trk_nTRTHits',
 'trk_vtx_x',
 'trk_vtx_y',
 'trk_vtx_z',
 'calo_cluster_lep_dR',
 'calo_cluster_e',
 'calo_cluster_pT',
 'calo_cluster_eta',
 'calo_cluster_phi',
 'calo_cluster_lep_dEta',
 'calo_cluster_lep_dPhi

In [9]:
options["baseline_features"] = [
        i for i in options["branches"] if i.startswith("baseline_")
]
options["lep_features"] = [
            i for i in options["branches"] if i.startswith("lep_")
]
options["lep_features"] += options["additional_appended_features"]
           
options["trk_features"] = [
     i for i in options["branches"] if i.startswith("trk_")
]
options["calo_features"] = [
    i for i in options["branches"] if i.startswith("calo_cluster_")
]

In [10]:
options["trk_features"]

['trk_lep_dR',
 'trk_pT',
 'trk_eta',
 'trk_phi',
 'trk_d0',
 'trk_z0',
 'trk_lep_dEta',
 'trk_lep_dPhi',
 'trk_lep_dD0',
 'trk_lep_dZ0',
 'trk_chi2',
 'trk_charge',
 'trk_nIBLHits',
 'trk_nPixHits',
 'trk_nPixHoles',
 'trk_nPixOutliers',
 'trk_nSCTHits',
 'trk_nSCTHoles',
 'trk_nTRTHits',
 'trk_vtx_x',
 'trk_vtx_y',
 'trk_vtx_z']

In [13]:
data_tree

<cppyy.gbl.TTree object at 0x55bb73777d10>

In [4]:
options["tree_name"]

'NormalizedTree'

In [6]:
tree = data_file.Get(options["tree_name"])

In [15]:
tree

<cppyy.gbl.TTree object at 0x55bb73777d10>

In [18]:
tree.GetEntry(200)

1100

In [19]:
transposed_tracks=list(getattr(tree,trk_feature) for trk_feature  in options["trk_features"])

In [43]:
getattr(tree,options["trk_features"][0])

<cppyy.gbl.std.vector<float> object at 0x55bb73956690>

In [20]:
import numpy as np
tracks = np.transpose(transposed_tracks)

In [21]:
print (tracks.shape)
print (len(transposed_tracks))

(2, 22)
22


In [30]:
lepton = [getattr(tree,lep) for lep in options["lep_features"]]
lepton = [0 if np.isnan(value) else value for value in lepton]
print (lepton)

[-0.4195576012134552, 0.8006266951560974, -0.9510858058929443, -0.30334019660949707, 0.20412613451480865, 0.36969390511512756, 0.44629958271980286, -0.04822413995862007, -1.9436091184616089, 0.20576323568820953]


In [36]:
tree.truth_type

6.0

In [37]:
tree.GetEntry(100)

1768

In [38]:
print (tree)

Name: NormalizedTree Title: normalized tree


In [46]:
print (tracks.shape)

(5, 22)


In [40]:
tree[100]

TypeError: 'TTree' object does not support indexing