In [None]:
# 3_Model_Training.ipynb
# ------------------------------
# PURPOSE: Train and evaluate an ML model, then save it

import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import joblib

# 1. Load final feature dataset
df = pd.read_csv("../data/processed/UHI_features.csv")

# 2. Define features and target
X = df.drop(columns=["UHI_Index"])  # Example: your target might be 'UHI_Index'
y = df["UHI_Index"]

# 3. Split into train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 4. Train a baseline XGBoost model
model = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
model.fit(X_train, y_train)

# 5. Evaluate
y_pred = model.predict(X_test)
print(f"R² Score: {r2_score(y_test, y_pred):.4f}")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")

# 6. Save the model
joblib.dump(model, "../models/UHI_xgboost.pkl")
print("Model training complete! Model saved to '../models/UHI_xgboost.pkl'.")

# 7. (Optional) Create a submission file if you have a submission template
submission = pd.DataFrame({"id": X_test.index, "predicted_UHI_Index": y_pred})
submission.to_csv("../Submission_template.csv", index=False)
print("Submission file created: '../Submission_template.csv'")
