In [None]:
import pandas as pd
from pyprojroot import here
import numpy as np
import joblib

import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import AnchoredSizeLocator

import lime
import lime.lime_tabular

from colors import *

In [None]:
TARGET = 'bed'
origin = 11


In [None]:
hpo_indicator = 0
fs_indicator = 0

In [None]:
name = f'lgbm-{hpo_indicator}-{fs_indicator}'
path = here() / f'data/processed/models/{name}.pkl'
model = joblib.load(path)
model

In [None]:
data = pd.read_csv(here() / f'data/interim/melted.csv', index_col='Datetime')

In [None]:
X = data.drop(columns='Crowding')
X = X.dropna()

In [None]:
# LIME
explainer_lime = lime.lime_tabular.LimeTabularExplainer(
    X.values,
    feature_names=X.columns,
    verbose=True,
    mode='classification',
    discretize_continuous=False
)


In [None]:

# Calculate LIME importance values
lime_importance = pd.Series(0.0, index=X.columns)
n_samples = min(500, len(X))  # Use 100 samples or less if dataset is smaller
sample_indices = np.random.choice(len(X), n_samples, replace=False)

for idx in sample_indices:
    exp = explainer_lime.explain_instance(
        X.iloc[idx].values, 
        model.predict_proba,
        num_features=len(X.columns)
    )
    # Accumulate absolute importance values
    for feature, importance in exp.local_exp[1]:
        lime_importance[X.columns[feature]] += abs(importance)

# Normalize LIME importance values
lime_importance = lime_importance / n_samples

In [None]:
top = lime_importance.sort_values(ascending=True).tail(10)

In [None]:
fig, ax = plt.subplots(figsize=(7,2.5))

top.plot.barh(width=.8, ax=ax, color=c[TARGET])

ax.grid(ls='--', lw=0.5)
ax.set_axisbelow(True)
ax.set_xlabel('LIME value')

plt.savefig(here() / f'output/plots/lime.png', dpi=300, bbox_inches='tight')