In [1]:
# %%
import optuna
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.datasets import load_wine
import optuna
from optuna.samplers import TPESampler
import pickle
import pandas as pd 
import joblib
import os, sys 
sys.path.append('/global/cfs/projectdirs/atlas/hrzhao/HEP_Repo/QG_Calibration/BDT_EB4/LightGBM')
from LightGBM_BDT_train import *

sys.path.append('/global/cfs/projectdirs/atlas/hrzhao/HEP_Repo/QG_Calibration/NewWorkflow')
from core.utils import normalize_hist, safe_array_divide
from core.Calculate_SF import convert_hist2unumpy, safe_array_divide_unumpy, Plot_Parton_ForwardvsCentral


import hist
from hist import Hist
import numpy as np

In [3]:
sample_path = '/global/cfs/projectdirs/atlas/hrzhao/HEP_Repo/QG_Calibration/NewWorkflow/LightGBM/training_sample.pkl'
output_path = './inspect_best/'

output_folder = Path(output_path)
if not output_folder.exists():
    output_folder.mkdir(parents=True, exist_ok=True)

training_vars = ['jet_pt', 'jet_eta', 'jet_nTracks', 'jet_trackWidth', 'jet_trackC1']
label_pt_bin = [500, 600, 800, 1000, 1200, 1500, 2000]



In [4]:
sample_test = joblib.load(output_folder/ 'sample_test.pkl')

In [8]:
from uncertainties import ufloat, unumpy
import atlas_mpl_style as ampl
ampl.use_atlas_style(usetex=False)

HistBins = {
    'jet_pt' : np.linspace(500, 2000, 61),
    'jet_eta' : np.linspace(-2.5, 2.5, 51), 
    'jet_nTracks' : np.linspace(0, 60, 61),
    'jet_trackWidth' : np.linspace(0, 0.4, 61),
    'jet_trackC1' : np.linspace(0, 0.4, 61),
    'jet_trackBDT' : np.linspace(-1.0, 1.0, 101),
    'GBDT_newScore' : np.linspace(-5.0, 5.0, 101),
}

Map_var_title = {
    "jet_pt": "$p_{T}$",
    "jet_nTracks": "$N_{trk}$",
    "jet_trackBDT": "old BDT",
    "jet_eta": "$\eta$",
    "jet_trackC1": "$C_{1}$",
    "jet_trackWidth": "W",
    "GBDT_newScore": "new BDT"
}


def Plot_Parton_ForwardvsCentral(pt, var, output_path, period, reweighting_var, reweighting_option, 
                                 p_Forward_Quark, p_Central_Quark, p_Forward_Gluon, p_Central_Gluon,
                                 n_vars=4):
    bin_edges = HistBins[var]
    bin_centers = 0.5 * (bin_edges[:-1] + bin_edges[1:])

    fig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, gridspec_kw={'height_ratios': [3, 1], 'hspace': 0})
    # breakpoint()
    ax0.errorbar(x=bin_centers, y=unumpy.nominal_values(p_Forward_Quark), yerr=unumpy.std_devs(p_Forward_Quark), color = 'blue', label = 'Forward Quark', drawstyle='steps-mid')
    ax0.errorbar(x=bin_centers, y=unumpy.nominal_values(p_Central_Quark), yerr=unumpy.std_devs(p_Central_Quark), color = 'blue', label = 'Central Quark', linestyle='--', drawstyle='steps-mid')
    ax0.errorbar(x=bin_centers, y=unumpy.nominal_values(p_Forward_Gluon), yerr=unumpy.std_devs(p_Forward_Gluon), color = 'red', label = 'Forward Gluon', drawstyle="steps-mid")
    ax0.errorbar(x=bin_centers, y=unumpy.nominal_values(p_Central_Gluon), yerr=unumpy.std_devs(p_Central_Gluon), color = 'red', label = 'Central Gluon', linestyle='--', drawstyle='steps-mid')    
    ax0.set_xlim(bin_edges[0], bin_edges[-1])
    ax0.set_ylabel("Normalized")
    ax0.legend(loc='upper right')
    ampl.draw_atlas_label(0.1, 0.85, ax=ax0, energy="13 TeV", simulation=True)
    ax0.set_title(f"{pt} GeV: MC Q/G in Forward/Central region, " + rf"{Map_var_title[var]} w/ {n_vars} vars")

    ratio_Quark = safe_array_divide_unumpy(numerator = p_Forward_Quark, denominator = p_Central_Quark)
    ratio_Gluon = safe_array_divide_unumpy(numerator = p_Forward_Gluon, denominator = p_Central_Gluon)

    ax1.errorbar(x=bin_centers, y=unumpy.nominal_values(ratio_Quark), yerr=unumpy.std_devs(ratio_Quark), color = 'blue', label = 'Quark', drawstyle='steps-mid')
    ax1.errorbar(x=bin_centers, y=unumpy.nominal_values(ratio_Gluon), yerr=unumpy.std_devs(ratio_Gluon), color = 'red', label = 'Gluon', drawstyle='steps-mid')

    ax1.set_ylabel("Forward/Central")
    ax1.set_ylim(0.7, 1.3)
    ax1.hlines(y = 1, xmin = bin_edges[0], xmax = bin_edges[-1], color = 'black', linestyle = '--')
    ax1.set_xlabel(f"{Map_var_title[var]}")
    ax1.legend(loc='upper right')
    ax1.plot()
    output_path_new = output_path 
    if not output_path_new.exists():
        output_path_new.mkdir(parents = True, exist_ok = True)
    fig.savefig(output_path_new / f"MC_truth_Q_G_FvsC_{pt}_{var}_{reweighting_option}.jpg")
    plt.close()


In [9]:
from matplotlib import pyplot as plt
for idx in range(6):
    sample_pt_idx = sample_test.loc[sample_test['pt_idx']==idx]
    sample_pt_idx_forward = sample_pt_idx.loc[sample_pt_idx['is_forward']==1]
    sample_pt_idx_central = sample_pt_idx.loc[sample_pt_idx['is_forward']==0]

    bins = np.linspace(-5, 5, 101)
    
    sample_pt_idx_forward_quark = sample_pt_idx_forward.loc[sample_pt_idx_forward['target']==0]
    sample_pt_idx_central_quark = sample_pt_idx_central.loc[sample_pt_idx_central['target']==0]
    sample_pt_idx_forward_gluon = sample_pt_idx_forward.loc[sample_pt_idx_forward['target']==1]
    sample_pt_idx_central_gluon = sample_pt_idx_central.loc[sample_pt_idx_central['target']==1]

    hist_forward_quark = Hist(hist.axis.Regular(bins=len(bins)-1, start=bins[0], stop=bins[-1], overflow=True, underflow=True), 
                                        storage=hist.storage.Weight())
    hist_central_quark = Hist(hist.axis.Regular(bins=len(bins)-1, start=bins[0], stop=bins[-1], overflow=True, underflow=True), 
                                        storage=hist.storage.Weight())

    hist_forward_quark.fill(sample_pt_idx_forward_quark['newGBDT_score'], weight=sample_pt_idx_forward_quark['event_weight'])
    hist_central_quark.fill(sample_pt_idx_central_quark['newGBDT_score'], weight=sample_pt_idx_central_quark['event_weight'])

    hist_forward_gluon = Hist(hist.axis.Regular(bins=len(bins)-1, start=bins[0], stop=bins[-1], overflow=True, underflow=True), 
                                        storage=hist.storage.Weight())
    hist_central_gluon = Hist(hist.axis.Regular(bins=len(bins)-1, start=bins[0], stop=bins[-1], overflow=True, underflow=True), 
                                        storage=hist.storage.Weight())

    hist_forward_gluon.fill(sample_pt_idx_forward_gluon['newGBDT_score'], weight=sample_pt_idx_forward_gluon['event_weight'])
    hist_central_gluon.fill(sample_pt_idx_central_gluon['newGBDT_score'], weight=sample_pt_idx_central_gluon['event_weight'])


    hist_forward_quark = normalize_hist(hist_forward_quark)
    hist_central_quark = normalize_hist(hist_central_quark)
    hist_forward_quark_unumpy = convert_hist2unumpy(hist_forward_quark)
    hist_central_quark_unumpy = convert_hist2unumpy(hist_central_quark)


    hist_forward_gluon = normalize_hist(hist_forward_gluon)
    hist_central_gluon = normalize_hist(hist_central_gluon)
    hist_forward_gluon_unumpy = convert_hist2unumpy(hist_forward_gluon)
    hist_central_gluon_unumpy = convert_hist2unumpy(hist_central_gluon)

    Plot_Parton_ForwardvsCentral(pt=label_pt_bin[idx], var="GBDT_newScore", period="ADE", output_path=output_folder, 
                             reweighting_var=None, reweighting_option=None,
                             p_Forward_Quark=hist_forward_quark_unumpy, p_Central_Quark=hist_central_quark_unumpy,
                             p_Forward_Gluon=hist_forward_gluon_unumpy, p_Central_Gluon=hist_central_gluon_unumpy)
    