In [1]:
import itertools
import logging
from pathlib import Path
import numba as nb

import awkward as ak
import click
import h5py
import numpy as np
import vector

from src.data.cms.convert_to_h5 import MIN_JETS, N_JETS, N_FJETS

vector.register_awkward()

logging.basicConfig(level=logging.INFO)

In [2]:
pred_file = ""
test_file = "/Users/billyli/UCSD/hhh/data/delphes/v2/hhh_test.h5"

In [3]:
HIGGS_MASS = 125.0
# precompute possible jet assignments lookup table
MIN_JETS=6

JET_ASSIGNMENTS = {}
for nH in range(0, 1+3):
    JET_ASSIGNMENTS[nH] = {}
    for nj in range(nH*2, N_JETS + 1):
        a = list(itertools.combinations(range(nj), 2))
        b = np.array([ assignment for assignment in itertools.combinations(a, nH) if len(np.unique(assignment)) == nH*2])
        JET_ASSIGNMENTS[nH][nj] = b

FJET_ASSIGNMENTS = {}

In [4]:
JET_ASSIGNMENTS[3][6][:,:,0]

array([[0, 2, 4],
       [0, 2, 3],
       [0, 2, 3],
       [0, 1, 4],
       [0, 1, 3],
       [0, 1, 3],
       [0, 1, 4],
       [0, 1, 2],
       [0, 1, 2],
       [0, 1, 3],
       [0, 1, 2],
       [0, 1, 2],
       [0, 1, 3],
       [0, 1, 2],
       [0, 1, 2]])

In [5]:
a = list(itertools.combinations(range(3), 2))

In [6]:
list(itertools.combinations(a, 3))

[((0, 1), (0, 2), (1, 2))]

In [7]:
len(set((0, 1)+(0, 2)+(1, 2)))

3

In [8]:
(0, 1)+(0, 2)+(1, 2)

(0, 1, 0, 2, 1, 2)

In [9]:
set((0, 1)+(0, 2)+(1, 2))

{0, 1, 2}

In [10]:
in_file = h5py.File(test_file)

In [11]:
in_file['INPUTS']['BoostedJets'].keys()

<KeysViewHDF5 ['MASK', 'fj_charge', 'fj_chargedenergyfrac', 'fj_cosphi', 'fj_ehadovereem', 'fj_eta', 'fj_mass', 'fj_ncharged', 'fj_neutralenergyfrac', 'fj_nneutral', 'fj_phi', 'fj_pt', 'fj_sdmass', 'fj_sinphi', 'fj_tau21', 'fj_tau32']>

In [12]:
in_file['INPUTS']['Jets'].keys()

<KeysViewHDF5 ['MASK', 'btag', 'cosphi', 'eta', 'flavor', 'mass', 'matchedfj', 'phi', 'pt', 'sinphi']>

In [13]:
# a function that loads jets from hhh_test.h5
def load_jets(in_file):
    # load jets from the h5
    pt = ak.Array(in_file["INPUTS"]["Jets"]["pt"])
    eta = ak.Array(in_file["INPUTS"]["Jets"]["eta"])
    phi = ak.Array(in_file["INPUTS"]["Jets"]["phi"])
    btag = ak.Array(in_file["INPUTS"]["Jets"]["btag"])
    mass = ak.Array(in_file["INPUTS"]["Jets"]["mass"])
    mask = ak.Array(in_file["INPUTS"]["Jets"]["MASK"])

    jets = ak.zip(
        {
            "pt": pt,
            "eta": eta,
            "phi": phi,
            "btag": btag,
            "mass": mass,
            "mask": mask
        },
        with_name="Momentum4D",
    )
    
    return jets

In [14]:
# a function that loads fat jets from hhh_test.h5
def load_fjets(in_file):
     # load fatjets from h5
    fj_pt = ak.Array(in_file["INPUTS"]["BoostedJets"]["fj_pt"])
    fj_eta = ak.Array(in_file["INPUTS"]["BoostedJets"]["fj_eta"])
    fj_phi = ak.Array(in_file["INPUTS"]["BoostedJets"]["fj_phi"])
    fj_mass = ak.Array(in_file["INPUTS"]["BoostedJets"]["fj_mass"])
    fj_mask = ak.Array(in_file["INPUTS"]["BoostedJets"]["MASK"])

    fjets = ak.zip(
        {
            "pt": fj_pt,
            "eta": fj_eta,
            "phi": fj_phi,
            'mass': fj_mass,
            'mask': fj_mask
        },
        with_name="Momentum4D"
    )
    
    return fjets

In [15]:
fjs = load_fjets(in_file)
fj_mask = fjs['mask']
fjmass_cond = (fjs['mass']>110) & (fjs['mass']<140)
fjpt_cond = fjs['pt']>300
fj_cond = fjmass_cond & fjpt_cond & fj_mask
fjs_selected = fjs[fj_cond]

fj_idx = ak.local_index(fjs)
bh_fj_idx = fj_idx[fj_cond]

In [16]:
js = load_jets(in_file)
js_idx = ak.local_index(js)

In [17]:
js[0][0]

In [18]:
fjs_selected[8].deltaR(js[0][0])

In [19]:
FJET_DR = 0.8

@nb.njit
def match_fjet_to_jet(fjets, jets, builder):
    for fjets_event, jets_event in zip(fjets, jets):
        builder.begin_list()
        for i, jet in enumerate(jets_event):
            match_idx = -1
            for j, fjet in enumerate(fjets_event):
                if jet.deltaR(fjet) < FJET_DR:
                    match_idx = j
            builder.append(match_idx)
        builder.end_list()

    return builder

In [20]:
matched_fj_idx = match_fjet_to_jet(fjs_selected, js, ak.ArrayBuilder()).snapshot()
js_unoverlapped = js[matched_fj_idx==-1]
idx_js_unoverlapped = js_idx[matched_fj_idx==-1]

In [21]:
js_unoverlapped
jcounts = ak.count(js_unoverlapped, axis=-1)

In [22]:
n_bhs_matched = ak.count(fjs_selected, axis=-1)
NrHs = (3-n_bhs_matched).to_numpy()

In [23]:
jet_assignments = [JET_ASSIGNMENTS[NrH_event][jcount_event] for jcount_event, NrH_event in zip(jcounts, NrHs)]

In [24]:
print(NrHs)

[3 3 3 ... 3 2 3]


In [25]:
# @nb.njit
def chi2_matching(js, jcounts, NrHs, jet_assignments, builder):
    HIGGS_MASS = 125
    predictions = []
    for js_e, jcount_e, ja_e, NrH_e in zip(js, jcounts, jet_assignments, NrHs):
        # print(NrH_e)
        builder.begin_list()
        j_b1 = js_e[ja_e[:,:,0]]
        j_b2 = js_e[ja_e[:,:,1]]
        mjj = (j_b1 + j_b2).mass
        chi2 = ak.sum(np.square(mjj - HIGGS_MASS), axis=-1)
        chi2_argmin = ak.argmin(chi2, axis=-1)
        
        prediction_e = [ja_e[:,i,:] for i in range(NrH_e)]
        predictions.append(prediction_e)

    return predictions

In [26]:
chi2_matching(js_unoverlapped, jcounts, NrHs, jet_assignments, ak.ArrayBuilder()).snapshot()

IndexError: too many indices for array: array is 2-dimensional, but 3 were indexed

In [None]:
#js_b1 = js[]
# jet_assignments = ak.Array(jet_assignments)
jet_assignments[0][1]

In [None]:
idx_js_b1 = [ pair[0] for ev in jet_assignments for comb in ev for pair in comb ]
idx_js_b2 = [ pair[1] for ev in jet_assignments for comb in ev for pair in comb ]

In [None]:
def main(pred_file, test_file):
    # load input test h5 file
    in_file = h5py.File(test_file)
    
    jets = load_jets(in_file)
    fjets = load_fjets(in_file)
    
    # select fjets based on pT and mass
    
    
    # find ak4jets that matched to selected ak8jets (dR check)
    
    # remove overlapped ak4jets
    
    # assign ak4 jets to the remaining higgs by chi2
    
    # save all assignment to the h5file
    
    return
    
    

   


main(pred_file, test_file)


## code segments waiting

In [None]:

    
    # just consider top-6 jets
    nj = 6
    mjj = (jets[:, JET_ASSIGNMENTS[nj][:, :, 0]] + jets[:, JET_ASSIGNMENTS[nj][:, :, 1]]).mass
    chi2 = ak.sum(np.square(mjj - HIGGS_MASS), axis=-1)
    chi2_argmin = ak.argmin(chi2, axis=-1)

    h1_bs = np.concatenate(
        (
            np.array(in_file["TARGETS"]["h1"]["b1"])[:, np.newaxis],
            np.array(in_file["TARGETS"]["h1"]["b2"])[:, np.newaxis],
        ),
        axis=-1,
    )
    h2_bs = np.concatenate(
        (
            np.array(in_file["TARGETS"]["h2"]["b1"])[:, np.newaxis],
            np.array(in_file["TARGETS"]["h2"]["b2"])[:, np.newaxis],
        ),
        axis=-1,
    )
    h3_bs = np.concatenate(
        (
            np.array(in_file["TARGETS"]["h3"]["b1"])[:, np.newaxis],
            np.array(in_file["TARGETS"]["h3"]["b2"])[:, np.newaxis],
        ),
        axis=-1,
    )

    # chi2 on fjets to find Higgs
    
    num_events = len(fj_pt) 
    bh1_b_pred = np.ones(shape=(num_events, 1), dtype=int)
    bh2_b_pred = np.ones(shape=(num_events, 1), dtype=int)*2
    bh3_b_pred = np.ones(shape=(num_events, 1), dtype=int)*3

    bh1_b = np.array(in_file["TARGETS"]["bh1"]["bb"])
    bh2_b = np.array(in_file["TARGETS"]["bh2"]["bb"])
    bh3_b = np.array(in_file["TARGETS"]["bh3"]["bb"])

    targets = [h1_bs, h2_bs, h3_bs, bh1_b, bh2_b, bh3_b]

    masks = np.concatenate(
        (
            np.array(in_file["TARGETS"]["h1"]["mask"])[np.newaxis, :],
            np.array(in_file["TARGETS"]["h2"]["mask"])[np.newaxis, :],
            np.array(in_file["TARGETS"]["h3"]["mask"])[np.newaxis, :],
            np.array(in_file["TARGETS"]["bh1"]["mask"])[np.newaxis, :],
            np.array(in_file["TARGETS"]["bh2"]["mask"])[np.newaxis, :],
            np.array(in_file["TARGETS"]["bh3"]["mask"])[np.newaxis, :]
        ),
        axis=0,
    )

    predictions = [
        JET_ASSIGNMENTS[nj][chi2_argmin][:, 0, :],
        JET_ASSIGNMENTS[nj][chi2_argmin][:, 1, :],
        JET_ASSIGNMENTS[nj][chi2_argmin][:, 2, :],
        bh1_b_pred,
        bh2_b_pred,
        bh3_b_pred,
    ]

    num_vectors = np.sum(mask, axis=-1).to_numpy() # number of unique objects in every event
    lines = 2 # how many lines are generated in the table
    results, jet_limits, clusters = evaluate_predictions(predictions, num_vectors, targets, masks, event_file, lines)
    display_table(results, jet_limits, clusters)
