# 05 · Interpretability — weights, permutation, feature fixing

In [None]:
import numpy as np, pandas as pd, torch, glob
from src.dataset import split_dataframe, build_matrices, load_scaler
from src.model import MLP

df = pd.read_csv('../data/processed/dataset.csv')
feature_cols = [c for c in df.columns if c.startswith('x')]
target_cols = ['H2_ppm','Propane_ppm']
_, _, test_df = split_dataframe(df, split_col='split')
X_test, y_test = build_matrices(test_df, feature_cols, target_cols)
scaler = load_scaler('../models/scaler.joblib')
X_test = scaler.transform(X_test).astype('float32')

ckpt_path = sorted(glob.glob('../models/best_model_*.pt'))[0]
ckpt = torch.load(ckpt_path, map_location='cpu')
model = MLP(ckpt['in_dim'], ckpt['hidden'], ckpt['out_dim'])
model.load_state_dict(ckpt['state_dict']); model.eval()

with torch.no_grad():
    base_pred = model(torch.from_numpy(X_test)).numpy()
base_mse = ((base_pred - y_test)**2).mean()
print('Baseline MSE:', base_mse)

# Weight-based importance
W1 = model.net[0].weight.detach().numpy()
W2 = model.net[3].weight.detach().numpy()
imp = np.sum(np.abs(W1), axis=0) * np.mean(np.abs(W2), axis=0)
wa = pd.Series(imp, index=feature_cols).sort_values(ascending=False)
wa.head(10)

### Permutation Importance

In [None]:
rng = np.random.default_rng(0)
def perm_importance(X, y, model, n_repeats=3):
    base = ((model(torch.from_numpy(X)).detach().numpy() - y)**2).mean()
    scores = {}
    for j in range(X.shape[1]):
        deltas = []
        for _ in range(n_repeats):
            Xp = X.copy()
            rng.shuffle(Xp[:, j])
            pr = model(torch.from_numpy(Xp)).detach().numpy()
            deltas.append(((pr - y)**2).mean() - base)
        scores[j] = float(np.mean(deltas))
    return scores

pi = pd.Series(perm_importance(X_test, y_test, model))
pi = pi.rename(index=lambda j: feature_cols[j]).sort_values(ascending=False)
pi.head(10)

### Feature Fixing

In [None]:
def fix_importance(X, y, model):
    base = ((model(torch.from_numpy(X)).detach().numpy() - y)**2).mean()
    scores = {}
    means = X.mean(axis=0, keepdims=True)
    for j in range(X.shape[1]):
        Xf = X.copy()
        Xf[:, j] = means[:, j]
        pr = model(torch.from_numpy(Xf)).detach().numpy()
        scores[j] = float(((pr - y)**2).mean() - base)
    return scores

fi = pd.Series(fix_importance(X_test, y_test, model))
fi = fi.rename(index=lambda j: feature_cols[j]).sort_values(ascending=False)
fi.head(10)