In [4]:
# # PLT Analysis Notebook
# 
# Este notebook divide el proceso en dos partes:
# 1. Generación del DataFrame `final_df` (similar al script original)
# 2. Debugging de la parte posterior del procesamiento

# %% [markdown]
# ## 1. Configuración inicial y dependencias

In [12]:
import json
from pathlib import Path
from typing import Dict, Tuple, List
import pandas as pd
import numpy as np
from numba import njit, prange
import argparse
import sys
import os
import argparse

In [15]:
import json
import numpy as np
from poggers.io import read_fill
from poggers.models import sub_nl
from poggers._utils import get_scan_timestamps
from typing import Any
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
import os
from itertools import combinations
from pathlib import Path
#from model.preprocessor import DifferencePreprocessor
#from sklearn.ensemble import IsolationForest
#from sklearn.preprocessing import StandardScaler
from poggers.options import PoggerOptions
from adtk.detector import LevelShiftAD, PersistAD
#from adtk.data import validate_series
from sklearn.linear_model import HuberRegressor
from scipy.stats import linregress
import mplhep as hep
import warnings

ModuleNotFoundError: No module named 'adtk'

In [13]:
# Importaciones específicas del proyecto
sys.path.insert(0, os.path.abspath('.'))
from model.figure_of_merit import Processor
from model.preprocessor import DifferencePreprocessor
from poggers.io import read_fill
from poggers.iterator import CentralIterator
from poggers.processor.mu_processor import MuProcessorExtension, MuProcessor
from poggers.models import sub_nl
from poggers.runner import runner

ModuleNotFoundError: No module named 'adtk'

In [None]:
# ## 2. Funciones necesarias (copiadas del script original)

In [None]:
@njit(parallel=True)
def numba_process_batch(data: np.ndarray, nbx: int, bxmask: np.ndarray) -> np.ndarray:
    n_rows = data.shape[0]
    
    out = np.empty(n_rows)
    for i in prange(n_rows):
        idata = data[i]
        
        data_clipped = np.minimum(idata, 2**14)
        bxraw = -np.log(data_clipped / (2**14))
        bxraw = np.where(np.isnan(bxraw) | np.isinf(bxraw), 0.0, bxraw)
        
        avgraw = np.sum(bxraw * bxmask)
        out[i] = avgraw

    return out

class PLTAggExtension(MuProcessorExtension):
    def __init__(self, svs: Dict[int, float], effs: Dict[int, float], lins: Dict[int, float]):
        self.svs = svs
        self.effs = effs
        self.lins = lins

    def process_batch(self, batch: pd.DataFrame, nbx: int, bxmask: np.ndarray) -> pd.DataFrame:
        data = np.stack(batch.data)
        batch["avgraw"] = numba_process_batch(data, nbx, bxmask)
        return batch[["lsnum", "nbnum", "timestampsec", "channelid", "avgraw"]]

    def build_dataframe(self, buffer: List[pd.DataFrame], nbx: int) -> pd.DataFrame:
        df = pd.concat(buffer).rename(columns={
                "timestampsec": "time",
                "channelid": "chid",
        }).pivot_table(
            index=["lsnum", "nbnum"], values=["avgraw", "time"], columns=["chid"]
        )
        avg_time = df["time"].mean(axis=1).reset_index(drop=True)
        df = df.drop(columns=["time"], level=0)
        df.columns = df.columns.droplevel(0).rename(None)
        df = df.reset_index()
        df.insert(2, "time", avg_time)
        df = self._apply_lin(df, nbx)
        df = self._apply_svs(df)
        return df

    def _apply_lin(self, df: pd.DataFrame, nbx: int) -> pd.DataFrame:
        channels = list(self.lins.keys())
        lins = np.array(list(self.lins.values()))
        df[channels] = sub_nl(df[channels].T, lins[:, np.newaxis], nbx).T
        return df

    def _apply_svs(self, df: pd.DataFrame) -> pd.DataFrame:
        channels = list(self.svs.keys())
        svs = np.array(list(self.svs.values()))
        effs = np.array(list(self.effs.values()))
        df[channels] = df[channels] * 11245.5 / (svs * effs)
        return df

def load_corrections(fill: int, path: Path) -> Tuple[Dict[int, float], Dict[int, float]]:
    with open(path, "r") as f:
        corrections: Dict[str, Dict[str, float]] = json.load(f)[str(fill)]
    channels = np.array(list(map(int, corrections["eff"].keys())))
    effs = np.array([corrections["eff"][str(ch)] for ch in channels])
    lins = np.array([corrections["lin"][str(ch)] for ch in channels])
    lafs = np.array([corrections["laf"][str(ch)] for ch in channels])
    bad_channels_mask = np.isnan(effs) | np.isnan(lins) | (lafs == False)
    channels = channels[~bad_channels_mask]
    effs = effs[~bad_channels_mask]
    lins = lins[~bad_channels_mask]
    return dict(zip(channels.tolist(), effs)), dict(zip(channels.tolist(), lins))

def load_calibrations(path: Path) -> Dict[int, float]:
    with open(path, "r") as f:
        calibrations: Dict[str, float] = json.load(f)
    channels = np.array(list(map(int, calibrations.keys())))
    sigmas = np.array([calibrations[str(ch)] for ch in channels])
    bad_channels_mask = np.isnan(sigmas)
    channels = channels[~bad_channels_mask]
    sigmas = sigmas[~bad_channels_mask]
    return dict(zip(channels, sigmas))