In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
from pathlib import Path
df = pd.read_csv(Path("../data/sim1/smeared.csv"))

In [None]:
# Optional: filter or check class balance
print(df['reaction_layer'].value_counts())

# Drop NaNs or outliers if needed
df = df.dropna()


In [None]:
features = ['b_in', 'b_out'] + [f'dE_{i+1}' for i in range(5)]
X = df[features]
y = df['reaction_layer']

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)
print(f"Train size: {len(y_train)}")
print(f"Test size:  {len(y_test)}")

In [None]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
# Raw matrix
cm = confusion_matrix(y_test, y_pred)

# Normalize rows to sum to 1 (or 100 for %)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100
sns.heatmap(cm_normalized, annot=True, fmt=".1f", cmap="Blues",
            xticklabels=np.unique(y), yticklabels=np.unique(y))
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Normalized Confusion Matrix (% per true class)")

plt.tight_layout()
plt.show()

In [None]:
importances = clf.feature_importances_
for name, imp in zip(features, importances):
    print(f"{name}: {imp:.3f}")

In [None]:
importances = clf.feature_importances_
feat_names = X.columns

plt.figure(figsize=(8, 5))
plt.barh(feat_names, importances)
plt.xlabel("Importance")
plt.title("Feature Importance (Random Forest)")
plt.tight_layout()
plt.show()

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

clf = LogisticRegression(max_iter=1000)
clf.fit(X_train_scaled, y_train)

In [None]:
y_pred = clf.predict(X_test_scaled)

from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, y_pred))

# Optional: normalized confusion matrix
cm = confusion_matrix(y_test, y_pred, normalize='true')
sns.heatmap(cm, annot=True, fmt=".2f", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title(f"{clf.__class__.__name__} - Normalized Confusion Matrix")
plt.tight_layout()
plt.show()

In [None]:
for i, class_label in enumerate(clf.classes_):
    print(f"\nClass {class_label}:")
    for feature, coef in zip(X.columns, clf.coef_[i]):
        print(f"  {feature}: {coef:.3f}")

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(10, 6))
im = ax.imshow(clf.coef_, cmap="coolwarm", aspect="auto")

ax.set_xticks(np.arange(len(X.columns)))
ax.set_xticklabels(X.columns, rotation=45)
ax.set_yticks(np.arange(len(clf.classes_)))
ax.set_yticklabels([f"Class {c}" for c in clf.classes_])

plt.colorbar(im, ax=ax, label="Coefficient value")
plt.title("Logistic Regression Coefficients per Class")
plt.tight_layout()
plt.show()

In [None]:
from sklearn.neighbors import KNeighborsClassifier

clf = KNeighborsClassifier(n_neighbors=5)
clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)

from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, y_pred))

# Optional: normalized confusion matrix
cm = confusion_matrix(y_test, y_pred, normalize='true')
sns.heatmap(cm, annot=True, fmt=".2f", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title(f"{clf.__class__.__name__} - Normalized Confusion Matrix")
plt.tight_layout()
plt.show()


In [None]:
from sklearn.inspection import permutation_importance

result = permutation_importance(clf, X_test, y_test, n_repeats=10, random_state=42)

# Display
for name, importance in zip(X.columns, result.importances_mean):
    print(f"{name}: {importance:.3f}")

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

clf = GradientBoostingClassifier()
clf.fit(X_train, y_train)


In [None]:
y_pred = clf.predict(X_test)

from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, y_pred))

# Optional: normalized confusion matrix
cm = confusion_matrix(y_test, y_pred, normalize='true')
sns.heatmap(cm, annot=True, fmt=".2f", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title(f"{clf.__class__.__name__} - Normalized Confusion Matrix")
plt.tight_layout()
plt.show()