In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import sys
import yaml
# Allow imports from project scripts
#sys.path.append(str(Path.cwd().parents[0]))
project_root = Path.cwd().parents[0]
sys.path.append(str(project_root))
from scripts.paths import PROJECT_ROOT, load_config

with open("../configs/sim1.yaml", 'r') as f:
    config = yaml.safe_load(f)
sim_cfg = config["simulation"]

sim_cfg


In [None]:

# Load data
df = pd.read_csv(Path("../data/sim1/smeared.csv"))  # adjust path as needed
df = df.dropna(subset=[col for col in df.columns if col.startswith("dE_")])

# Binary target: 0 = no reaction, 1 = reaction occurred
df["reaction_occurred"] = (df["reaction_layer"] > 0).astype(int)

# Check balance
print(df["reaction_occurred"].value_counts(normalize=True))


In [None]:
from sklearn.model_selection import train_test_split

# Features: you can tweak this later
features = ['b_in', 'b_out'] + [f'dE_{i+1}' for i in range(5)]
X = df[features]
y = df["reaction_occurred"]

# Train-test split with stratification to preserve class balance
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, stratify=y, random_state=42
)

print("Class balance in train:", y_train.value_counts(normalize=True).round(3).to_dict())


In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train logistic regression
clf = LogisticRegression(class_weight='balanced', max_iter=1000)
clf.fit(X_train_scaled, y_train)
y_pred = clf.predict(X_test_scaled)

# Evaluate
print(classification_report(y_test, y_pred, digits=3))

In [None]:
reacted_df = df[df["reaction_occurred"] == 1].copy()

# Add total depth = sum of full layers before + reaction_depth in layer
thicknesses = sim_cfg["layer_thickness"]  # or sim_cfg if in notebook
def compute_total_depth(row):
    if row["reaction_layer"] == 0:
        return np.nan
    layer_idx = int(row["reaction_layer"]) - 1  # convert to int
    return sum(thicknesses[:layer_idx]) + row["reaction_depth"]

df["reaction_total_depth"] = reacted_df.apply(compute_total_depth, axis=1)
X_react = reacted_df[features]
y_react = reacted_df["reaction_total_depth"]

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Split
Xr_train, Xr_test, yr_train, yr_test = train_test_split(
    X_react, y_react, test_size=0.25, random_state=42
)

# Scale features
scaler_r = StandardScaler()
Xr_train_scaled = scaler_r.fit_transform(Xr_train)
Xr_test_scaled = scaler_r.transform(Xr_test)

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# Fit regressor
reg = RandomForestRegressor(n_estimators=100, random_state=42)
reg.fit(Xr_train_scaled, yr_train)
yr_pred = reg.predict(Xr_test_scaled)

# Evaluate
mae = mean_absolute_error(yr_test, yr_pred)
rmse = np.sqrt(mean_squared_error(yr_test, yr_pred))

print(f"MAE:  {mae:.4f}")
print(f"RMSE: {rmse:.4f}")



In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

# Get reaction layers for coloring
layer_colors = reacted_df.loc[y_react.index, "reaction_layer"].loc[yr_test.index]

# Setup discrete colormap
unique_layers = sorted(layer_colors.unique())
cmap = plt.cm.get_cmap('viridis', len(unique_layers))
norm = mcolors.BoundaryNorm(boundaries=np.arange(min(unique_layers)-0.5, max(unique_layers)+1.5), ncolors=len(unique_layers))

plt.figure(figsize=(7, 6))
scatter = plt.scatter(yr_test, yr_pred, c=layer_colors, cmap=cmap, norm=norm, alpha=0.6, s=10)
plt.plot([yr_test.min(), yr_test.max()], [yr_test.min(), yr_test.max()], 'r--')
plt.xlabel("True Reaction Depth [mm]")
plt.ylabel("Predicted Depth [mm]")
plt.title("Reaction Depth Prediction Colored by Reaction Layer")
cbar = plt.colorbar(scatter, ticks=unique_layers)
cbar.set_label("Reaction Layer")
plt.tight_layout()
plt.show()

In [None]:
# Residuals plot with same coloring
residuals = yr_pred - yr_test

plt.figure(figsize=(8, 5))
scatter = plt.scatter(yr_test, residuals, c=layer_colors, cmap=cmap, norm=norm, alpha=0.6, s=10)
plt.axhline(0, color='red', linestyle='--')
plt.xlabel("True Reaction Depth [mm]")
plt.ylabel("Residual (Predicted - True) [mm]")
plt.title("Residuals Colored by Reaction Layer")
cbar = plt.colorbar(scatter, ticks=unique_layers)
cbar.set_label("Reaction Layer")
plt.grid(True)
plt.tight_layout()
plt.show()