In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import joblib

# 1. Load the final feature dataset
df = pd.read_csv("../data/processed/UHI_features.csv")

# Print initial data types for debugging
print("Before datetime processing:")
print(df.dtypes)

# 2. Convert 'datetime' column to datetime object and extract numeric features
if 'datetime' in df.columns:
    df['datetime'] = pd.to_datetime(df['datetime'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
    # Create numeric features: hour, day, month
    df['hour'] = df['datetime'].dt.hour
    df['day'] = df['datetime'].dt.day
    df['month'] = df['datetime'].dt.month
    # Optionally, you could extract other features like weekday
    df['weekday'] = df['datetime'].dt.weekday
    # Drop the original datetime column as it is non-numeric
    df = df.drop(columns=["datetime"])

# Print data types after processing
print("\nAfter datetime processing:")
print(df.dtypes)

# 3. Define features and target
# Use the exact column name for target: 'UHI Index'
X = df.drop(columns=["UHI Index"])
y = df["UHI Index"]

# 4. Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 5. Train a baseline XGBoost model
model = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
model.fit(X_train, y_train)

# 6. Evaluate the model
y_pred = model.predict(X_test)
print(f"R² Score: {r2_score(y_test, y_pred):.4f}")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")

# 7. Save the trained model
joblib.dump(model, "../models/UHI_xgboost.pkl")
print("Model saved successfully to ../models/UHI_xgboost.pkl")


Before datetime processing:
Longitude    float64
Latitude     float64
datetime      object
UHI Index    float64
dtype: object

After datetime processing:
Longitude    float64
Latitude     float64
UHI Index    float64
hour           int32
day            int32
month          int32
weekday        int32
dtype: object
R² Score: 0.8773
MAE: 0.0044
Model saved successfully to ../models/UHI_xgboost.pkl
