# Goal
The fraction of events in the signal region bin 1, bin2 and bin3 that have Jet 2 TXbb < 0.3

```python
mask_bin1 = (bdt_events["H2TXbb"] > args.txbb_wps[0]) & (
    bdt_events[bdt_score] > args.bdt_wps[0]
)

mask_bin2 = (
    (bdt_events["H2TXbb"] > args.txbb_wps[1])
    & (bdt_events[bdt_score] > args.bdt_wps[1])
    & ~(mask_bin1)
    & ~(mask_corner)
    & ~(mask_vbf)
)


mask_bin3 = (
    (bdt_events["H2TXbb"] > args.txbb_wps[1])
    & (bdt_events[bdt_score] > args.bdt_wps[2])
    & ~(mask_bin1)
    & ~(mask_bin2)
    & ~(mask_vbf)
)
```
In this case
```python
args.txbb_wps = [0.9375, 0.7475]
args.bdt_wps = [0.9075, 0.6375, 0.03]
```

In [3]:
from pathlib import Path
import pandas as pd

An explict way:
```python
# variables used in `process.sh`
txbb_wps = [0.9375, 0.7475]
bdt_wps = [0.9075, 0.6375, 0.03]
vbf = True  # --vbf
vbf_priority = False  # --no-vbf-priority
vbf_txbb_wp = 0.775
vbf_bdt_wp = 0.9825

# bins, as defined in PostProcess.py
# bin 1
mask_bin1 = (df["H2TXbb"] > txbb_wps[0]) & (
    df["bdt_score"] > bdt_wps[0]
)

# bin 2
if vbf:
    mask_vbf = (df["bdt_score_vbf"] > vbf_bdt_wp) & (
        df["H2TXbb"] > vbf_txbb_wp
    )
else:
    mask_vbf = np.zeros(len(df), dtype=bool)
    
if vbf_priority:
    # prioritize VBF region i.e. veto events in bin1 that pass the VBF selection
    mask_bin1 = mask_bin1 & ~(mask_vbf)
else:
    # prioritize bin 1 i.e. veto events in VBF region that pass the bin 1 selection
    mask_vbf = mask_vbf & ~(mask_bin1)
mask_corner = (df["H2TXbb"] < txbb_wps[0]) & (
    df["bdt_score"] < bdt_wps[0]
)
mask_bin2 = (
    (df["H2TXbb"] > txbb_wps[1])
    & (df["bdt_score"] > bdt_wps[1])
    & ~(mask_bin1)
    & ~(mask_corner)
    & ~(mask_vbf)
)

# bin 3
mask_bin3 = (
    (df["H2TXbb"] > txbb_wps[1])
    & (df["bdt_score"] > bdt_wps[2])
    & ~(mask_bin1)
    & ~(mask_bin2)
    & ~(mask_vbf)
)
```
However, we already stored category in the dataframe


In [57]:
eras = ["2022", "2022EE", "2023BPix", "2023"]
TXbb_th = 0.3

txbb_wps = [0.9375, 0.7475]
bdt_wps = [0.9075, 0.6375, 0.03]
vbf = True  # --vbf
vbf_priority = False  # --no-vbf-priority
vbf_txbb_wp = 0.775
vbf_bdt_wp = 0.9825

era_stats = {}
era_stats["TXbb_th"]= TXbb_th
args = {}
args["txbb_wps"] = txbb_wps
args["bdt_wps"] = bdt_wps
args["vbf"] = vbf
args["vbf_priority"] = vbf_priority
args["vbf_txbb_wp"] = vbf_txbb_wp
args["vbf_bdt_wp"] = vbf_bdt_wp
era_stats["args"] = args


for era in eras:
    counts = {}
    path = Path(f"events/processed_events_{era}.pkl")
    if path.exists():
        df_dict = pd.read_pickle(path)
    else:
        print(f"File {path} not found")
        continue
    
    signal_stats = {}
    for signal_type in df_dict.keys():
        df = df_dict[signal_type]
        dfs_bin = [df[df["Category"] == i + 1] for i in range(3)]

        stats = {}
        total_events = len(df)
        stats["events"] = total_events
        print(f"Total events for {signal_type} in {era}: {total_events}")
        stats["events_by_bin"] = {}
        stats["events_pass_by_bin"] = {}
        stats["fraction_pass_by_bin"] = {}

        for i, df_bin in enumerate(dfs_bin):
            count = len(df_bin)
            stats["events_by_bin"][i+1] = int(count)
            pass_count = (df_bin["ak8FatJet2TXbb"] < TXbb_th).sum()
            stats["events_pass_by_bin"][i+1] = int(pass_count)
            if count != 0:
                fraction = pass_count / count
            else:
                fraction = -1
            stats["fraction_pass_by_bin"][i+1] = float(fraction)
            print(f"Events in bin {i+1}: {pass_count} / {count} ({fraction*100:.2f}%)")
        
        signal_stats[signal_type] = stats
        
    era_stats[era] = signal_stats
    

Total events for data in 2022: 69041
Events in bin 1: 0 / 7 (0.00%)
Events in bin 2: 1 / 55 (1.82%)
Events in bin 3: 14 / 586 (2.39%)
Total events for ttbar in 2022: 144348
Events in bin 1: 0 / 10 (0.00%)
Events in bin 2: 3 / 329 (0.91%)
Events in bin 3: 95 / 4617 (2.06%)
Total events for gghtobb in 2022: 72599
Events in bin 1: 2 / 54 (3.70%)
Events in bin 2: 2 / 229 (0.87%)
Events in bin 3: 5 / 369 (1.36%)
Total events for vbfhtobb in 2022: 455
Events in bin 1: 0 / 0 (-100.00%)
Events in bin 2: 0 / 0 (-100.00%)
Events in bin 3: 0 / 0 (-100.00%)
Total events for vhtobb in 2022: 49046
Events in bin 1: 16 / 921 (1.74%)
Events in bin 2: 18 / 1540 (1.17%)
Events in bin 3: 37 / 4138 (0.89%)
Total events for novhhtobb in 2022: 73054
Events in bin 1: 2 / 54 (3.70%)
Events in bin 2: 2 / 229 (0.87%)
Events in bin 3: 5 / 369 (1.36%)
Total events for tthtobb in 2022: 10754
Events in bin 1: 1 / 18 (5.56%)
Events in bin 2: 18 / 219 (8.22%)
Events in bin 3: 22 / 463 (4.75%)
Total events for diboson 

In [58]:
import json
with open("counts_jet2.json", "w") as f:
    json.dump(era_stats, f)