In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("../../")
import data_loading as dl

from microfit import run_plotter as rp
from microfit import histogram as hist

import make_detsys as detsys

In [3]:
from importlib import reload
import logging
reload(logging)

logging.basicConfig(level=logging.DEBUG)

In [4]:
RUN = ["1","2","3"]

In [None]:
rundata_pi0, mc_weights_pi0, data_pot_pi0 = dl.load_runs(
    RUN,
    data="bnb",
    truth_filtered_sets=["nue", "drt"],
    loadpi0variables=True,
    loadshowervariables=True,
    loadrecoveryvars=True,
    loadsystematics=True,
    load_lee=True,
    blinded=False,
    enable_cache=True,
    numupresel=False,
    loadnumuvariables=False,
    use_bdt=True,
    load_numu_tki=False
)

rundata, mc_weights, data_pot = dl.load_runs(
    RUN,
    data="bnb",
    truth_filtered_sets=["nue","drt"],
    loadpi0variables=True,
    loadshowervariables=True,
    loadrecoveryvars=True,
    loadsystematics=True,
    load_lee=True,
    blinded=False,
    enable_cache=True,
    numupresel=False,
    loadnumuvariables=False,
    use_bdt=True,
    load_numu_tki=False
)

Loading run 1


INFO:numexpr.utils:NumExpr defaulting to 8 threads.


Prior to truth filtering:
31454
After truth filtering:
30412
Loading run 2
Prior to truth filtering:
25108
After truth filtering:
24196
Loading run 3


In [None]:
selection = "PI0"
preselection = "PI0"
binning_def = ("reco_e", 10, (0.15, 1.55), "neutrino reconstructed energy [GeV]")
binning = hist.Binning.from_config(*binning_def)
binning.label = "PI0"

pi0_detvars = detsys.make_variations(
    RUN,
    "bnb",
    selection,
    preselection,
    binning,
    truth_filtered_sets=["nue","cc_pi0","nc_pi0"],
    make_plots=True,
    loadpi0variables=True,
    loadshowervariables=True,
    loadrecoveryvars=True,
    loadsystematics=False,
    enable_cache=True,
    numupresel=False,
    loadnumuvariables=False,
    use_bdt=True,
    load_numu_tki=False   
)

binning = hist.Binning.from_config(*binning_def)
binning.label = "PI0"
pi0_generator = hist.RunHistGenerator(
    rundata_pi0,
    binning,
    data_pot=data_pot_pi0,
    selection=selection,
    preselection=preselection,
    sideband_generator=None,
    uncertainty_defaults=None,
    detvar_data_path=pi0_detvars,
)

plotter_pi0 = rp.RunHistPlotter(pi0_generator, selection_title="pi0 selection")
plotter_pi0.title = plotter_pi0.get_selection_title(selection=selection, preselection=preselection)

plotter_pi0 = rp.RunHistPlotter(pi0_generator)
axes = plotter_pi0.plot(
    category_column="paper_category",
    include_multisim_errors=True,
    add_ext_error_floor=False,
    show_data_mc_ratio=True,
    show_chi_square=True,
    smooth_ext_histogram=False,
    add_precomputed_detsys=True
)

In [None]:
RUN = ["1","2","3"]
binning_def = ("reco_e", 10, (0.15, 1.55), r"Reconstructed Energy [GeV]")  # reproduce technote

zp_binning = Binning.from_config(*binning_def)
#zp_binning.set_selection(preselection="ZP", selection="ZPBDT")
zp_binning.label = "ZPBDT"
selection = "ZPBDT"
preselection = "ZP"

zp_detvars = detsys.make_variations(
    RUN,
    "bnb",
    selection,
    preselection,
    zp_binning,
    truth_filtered_sets=["nue","cc_pi0","nc_pi0"],
    make_plots=True,
    loadpi0variables=True,
    loadshowervariables=True,
    loadrecoveryvars=True,
    loadsystematics=False,
    enable_cache=True,
    numupresel=False,
    loadnumuvariables=False,
    use_bdt=True,
    load_numu_tki=False
)

signal_generator = hist.RunHistGenerator(
    rundata,
    zp_binning,
    data_pot=data_pot,
    selection=selection,
    preselection=preselection,
    uncertainty_defaults=None,
    detvar_data_path=zp_detvars
)

plotter = rp.RunHistPlotter(signal_generator)
axes = plotter.plot(
    category_column="paper_category",
    include_multisim_errors=True,
    add_ext_error_floor=False,
    show_data_mc_ratio=True,
    show_chi_square=True,
    smooth_ext_histogram=False,
    add_precomputed_detsys=True
)

In [None]:
# Create the multi channel binning

from microfit.analysis import MultibandAnalysis
from microfit.histogram import Binning, RunHistGenerator, MultiChannelBinning
from microfit.run_plotter import RunHistPlotter

analysis = MultibandAnalysis(
    run_hist_generators=[pi0_generator,signal_generator],
    constraint_channels=["PI0"],
    signal_channels=["NPL_MEDIUM_PID"],
    
)

plotter = RunHistPlotter(analysis).plot(
    include_multisim_errors=True,
    add_ext_error_floor=False,
    stacked=True,
    show_total=True,
    use_sideband=True,
    show_data_mc_ratio=True,
    show_chi_square=True,
    add_precomputed_detsys=True
    # data_pot=analysis._get_pot_for_channel("NPBDT"),
    # channel="NPBDT",
)

In [None]:
sideband_generator_pi0 = pi0_generator.get_hist_generator(which="mc")
sideband_total_prediction_pi0 = pi0_generator.get_total_prediction(include_multisim_errors=True)
sideband_observed_hist_pi0 = pi0_generator.get_data_hist()

In [None]:
from importlib import reload
import logging
reload(logging)
logging.basicConfig(level=logging.WARN)

In [None]:
# we can also extract the histogram generators and from those get the multiband covariance matrices
pi0_hist_gen = pi0_generator.get_hist_generator(which="mc")
signal_hist_gen = signal_generator.get_hist_generator(which="mc")

In [None]:
ms_column = ""
hist_gens = [signal_hist_gen,sideband_hist_gen,pi0_hist_gen]
#hist_gens = [np_hist_gen, zp_hist_gen]

multiband_covariance = hist.HistogramGenerator.multiband_covariance(
    hist_gens,
    ms_column=ms_column
)

In [None]:
import numpy as np
# convert the covariance into a correlation matrix
with np.errstate(divide='ignore', invalid='ignore'):
    multiband_correlation = multiband_covariance / np.sqrt(np.outer(np.diag(multiband_covariance), np.diag(multiband_covariance)))
# replace NANs with 0s
multiband_correlation = np.nan_to_num(multiband_correlation)

In [None]:
multiband_correlation

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(8, 6), constrained_layout=True)
# show the covariance matrix as a heatmap
X, Y = np.meshgrid(np.arange(multiband_correlation.shape[0] + 1), np.arange(multiband_correlation.shape[1] + 1))
p = ax.pcolormesh(X, Y, multiband_correlation.T, cmap="Spectral_r", shading="flat")
# colorbar
cbar = fig.colorbar(p, ax=ax)
cbar.set_label("correlation")
ax.set_title(f"Multiband Correlation: {ms_column}")
# turn off tick labels
ax.set_yticklabels([])
ax.set_xticklabels([])
# set tick marks at every bin
ax.set_xticks(np.arange(multiband_correlation.shape[0]) + 0.5, minor=False)
ax.set_yticks(np.arange(multiband_correlation.shape[1]) + 0.5, minor=False)
ax.tick_params(axis='both', which='both', direction="in")


# draw vertical and horizontal lines splitting the different histograms that went
# into the covariance matrix
pos = 0
hist_gen_labels = ["Signal", "NUMU","PI0"]
for hist_gen, label in zip(hist_gens, hist_gen_labels):
    pos += hist_gen.binning.n_bins
    ax.axvline(pos, color="k", linestyle="--")
    ax.axhline(pos, color="k", linestyle="--")
    ax.text(pos - hist_gen.binning.n_bins / 2, -1, label, ha="center", va="top", fontsize=12)
    ax.text(-1, pos - hist_gen.binning.n_bins / 2, label, ha="right", va="center", fontsize=12)

plt.savefig(f"plots/multiband_correlation_{ms_column}_numu_crt.pdf")