In [16]:
import itertools
import logging
from pathlib import Path
import numba as nb

import awkward as ak
import click
import h5py
import numpy as np
import vector

from src.data.cms.convert_to_h5 import MIN_JETS, N_JETS, N_FJETS

vector.register_awkward()

logging.basicConfig(level=logging.INFO)

In [2]:
pred_file = ""
test_file = "/Users/billyli/UCSD/hhh/data/delphes/v2/hhh_test.h5"

In [3]:
HIGGS_MASS = 125.0
# precompute possible jet assignments lookup table
JET_ASSIGNMENTS = {}
for nj in range(MIN_JETS, N_JETS + 1):
    a = list(itertools.combinations(range(nj), 2))
    b = np.array([(i, j, k) for i, j, k in itertools.combinations(a, 3) if len(set(i + j + k)) == MIN_JETS])
    JET_ASSIGNMENTS[nj] = b

FJET_ASSIGNMENTS = {}

In [5]:
in_file = h5py.File(test_file)

In [6]:
in_file['INPUTS']['BoostedJets'].keys()

<KeysViewHDF5 ['MASK', 'fj_charge', 'fj_chargedenergyfrac', 'fj_cosphi', 'fj_ehadovereem', 'fj_eta', 'fj_mass', 'fj_ncharged', 'fj_neutralenergyfrac', 'fj_nneutral', 'fj_phi', 'fj_pt', 'fj_sdmass', 'fj_sinphi', 'fj_tau21', 'fj_tau32']>

In [7]:
in_file['INPUTS']['Jets'].keys()

<KeysViewHDF5 ['MASK', 'btag', 'cosphi', 'eta', 'flavor', 'mass', 'matchedfj', 'phi', 'pt', 'sinphi']>

In [8]:
# a function that loads jets from hhh_test.h5
def load_jets(in_file):
    # load jets from the h5
    pt = ak.Array(in_file["INPUTS"]["Jets"]["pt"])
    eta = ak.Array(in_file["INPUTS"]["Jets"]["eta"])
    phi = ak.Array(in_file["INPUTS"]["Jets"]["phi"])
    btag = ak.Array(in_file["INPUTS"]["Jets"]["btag"])
    mass = ak.Array(in_file["INPUTS"]["Jets"]["mass"])
    mask = ak.Array(in_file["INPUTS"]["Jets"]["MASK"])

    jets = ak.zip(
        {
            "pt": pt,
            "eta": eta,
            "phi": phi,
            "btag": btag,
            "mass": mass,
            "mask": mask
        },
        with_name="Momentum4D",
    )
    
    return jets

In [9]:
# a function that loads fat jets from hhh_test.h5
def load_fjets(in_file):
     # load fatjets from h5
    fj_pt = ak.Array(in_file["INPUTS"]["BoostedJets"]["fj_pt"])
    fj_eta = ak.Array(in_file["INPUTS"]["BoostedJets"]["fj_eta"])
    fj_phi = ak.Array(in_file["INPUTS"]["BoostedJets"]["fj_phi"])
    fj_mass = ak.Array(in_file["INPUTS"]["BoostedJets"]["fj_mass"])
    fj_mask = ak.Array(in_file["INPUTS"]["BoostedJets"]["MASK"])

    fjets = ak.zip(
        {
            "pt": fj_pt,
            "eta": fj_eta,
            "phi": fj_phi,
            'mass': fj_mass,
            'mask': fj_mask
        },
        with_name="Momentum4D"
    )
    
    return fjets

In [11]:
fjs = load_fjets(in_file)
fj_mask = fjs['mask']
fjmass_cond = (fjs['mass']>110) & (fjs['mass']<140)
fjpt_cond = fjs['pt']>300
fj_cond = fjmass_cond & fjpt_cond & fj_mask
fjs_selected = fjs[fj_cond]

fj_idx = ak.local_index(fjs)
bh_fj_idx = fj_idx[fj_cond]

In [24]:
js = load_jets(in_file)
js_idx = ak.local_index(js)

In [13]:
js[0][0]

In [14]:
fjs_selected[8].deltaR(js[0][0])

In [18]:
FJET_DR = 0.8

@nb.njit
def match_fjet_to_jet(fjets, jets, builder):
    for fjets_event, jets_event in zip(fjets, jets):
        builder.begin_list()
        for i, jet in enumerate(jets_event):
            match_idx = -1
            for j, fjet in enumerate(fjets_event):
                if jet.deltaR(fjet) < FJET_DR:
                    match_idx = j
            builder.append(match_idx)
        builder.end_list()

    return builder

In [27]:
matched_fj_idx = match_fjet_to_jet(fjs_selected, js, ak.ArrayBuilder()).snapshot()
js_unoverlapped = js[matched_fj_idx==-1]
idx_js_unoverlapped = js_idx[matched_fj_idx==-1]

In [29]:
js_unoverlapped
ak.count(js_unoverlapped, axis=-1)

In [None]:
@nb.njit
def match_fjet_to_jet(js, j_counts, jet_assignments, builder):
    for fjets_event, jets_event in zip(fjets, jets):
        builder.begin_list()
        for i, jet in enumerate(jets_event):
            match_idx = -1
            for j, fjet in enumerate(fjets_event):
                if jet.deltaR(fjet) < FJET_DR:
                    match_idx = j
            builder.append(match_idx)
        builder.end_list()

    return builder

In [17]:

def main(pred_file, test_file):
    # load input test h5 file
    in_file = h5py.File(test_file)
    
    jets = load_jets(in_file)
    fjets = load_fjets(in_file)
    
    # select fjets based on pT and mass
    
    
    # find ak4jets that matched to selected ak8jets (dR check)
    
    # remove overlapped ak4jets
    
    # assign ak4 jets to the remaining higgs by chi2
    
    # save all assignment to the h5file
    
    return
    
    

   


main(pred_file, test_file)


## code segments waiting

In [None]:

    
    # just consider top-6 jets
    nj = 6
    mjj = (jets[:, JET_ASSIGNMENTS[nj][:, :, 0]] + jets[:, JET_ASSIGNMENTS[nj][:, :, 1]]).mass
    chi2 = ak.sum(np.square(mjj - HIGGS_MASS), axis=-1)
    chi2_argmin = ak.argmin(chi2, axis=-1)

    h1_bs = np.concatenate(
        (
            np.array(in_file["TARGETS"]["h1"]["b1"])[:, np.newaxis],
            np.array(in_file["TARGETS"]["h1"]["b2"])[:, np.newaxis],
        ),
        axis=-1,
    )
    h2_bs = np.concatenate(
        (
            np.array(in_file["TARGETS"]["h2"]["b1"])[:, np.newaxis],
            np.array(in_file["TARGETS"]["h2"]["b2"])[:, np.newaxis],
        ),
        axis=-1,
    )
    h3_bs = np.concatenate(
        (
            np.array(in_file["TARGETS"]["h3"]["b1"])[:, np.newaxis],
            np.array(in_file["TARGETS"]["h3"]["b2"])[:, np.newaxis],
        ),
        axis=-1,
    )

    # chi2 on fjets to find Higgs
    
    num_events = len(fj_pt) 
    bh1_b_pred = np.ones(shape=(num_events, 1), dtype=int)
    bh2_b_pred = np.ones(shape=(num_events, 1), dtype=int)*2
    bh3_b_pred = np.ones(shape=(num_events, 1), dtype=int)*3

    bh1_b = np.array(in_file["TARGETS"]["bh1"]["bb"])
    bh2_b = np.array(in_file["TARGETS"]["bh2"]["bb"])
    bh3_b = np.array(in_file["TARGETS"]["bh3"]["bb"])

    targets = [h1_bs, h2_bs, h3_bs, bh1_b, bh2_b, bh3_b]

    masks = np.concatenate(
        (
            np.array(in_file["TARGETS"]["h1"]["mask"])[np.newaxis, :],
            np.array(in_file["TARGETS"]["h2"]["mask"])[np.newaxis, :],
            np.array(in_file["TARGETS"]["h3"]["mask"])[np.newaxis, :],
            np.array(in_file["TARGETS"]["bh1"]["mask"])[np.newaxis, :],
            np.array(in_file["TARGETS"]["bh2"]["mask"])[np.newaxis, :],
            np.array(in_file["TARGETS"]["bh3"]["mask"])[np.newaxis, :]
        ),
        axis=0,
    )

    predictions = [
        JET_ASSIGNMENTS[nj][chi2_argmin][:, 0, :],
        JET_ASSIGNMENTS[nj][chi2_argmin][:, 1, :],
        JET_ASSIGNMENTS[nj][chi2_argmin][:, 2, :],
        bh1_b_pred,
        bh2_b_pred,
        bh3_b_pred,
    ]

    num_vectors = np.sum(mask, axis=-1).to_numpy() # number of unique objects in every event
    lines = 2 # how many lines are generated in the table
    results, jet_limits, clusters = evaluate_predictions(predictions, num_vectors, targets, masks, event_file, lines)
    display_table(results, jet_limits, clusters)
