In [None]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE

# 讀取資料
df = pd.read_csv("data/CA_Weather_Fire_Dataset_1984-2025.csv")
df['DATE'] = pd.to_datetime(df['DATE'])
df = df.dropna()
df['FIRE_START_DAY'] = df['FIRE_START_DAY'].astype(int)

# One-hot 編碼季節
df = pd.get_dummies(df, columns=['SEASON'], drop_first=True)

# 時間特徵
df['WEEKDAY'] = df['DATE'].dt.weekday
df['IS_WEEKEND'] = df['WEEKDAY'].isin([5, 6]).astype(int)

# 數值標準化
features_to_scale = [
    'PRECIPITATION', 'MAX_TEMP', 'MIN_TEMP', 'AVG_WIND_SPEED',
    'TEMP_RANGE', 'WIND_TEMP_RATIO', 'LAGGED_PRECIPITATION',
    'LAGGED_AVG_WIND_SPEED'
]
scaler = StandardScaler()
df[features_to_scale] = scaler.fit_transform(df[features_to_scale])

# 特徵與標籤
X = df.drop(columns=['FIRE_START_DAY', 'DATE'])
y = df['FIRE_START_DAY']

# SMOTE 類別平衡
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# 分割資料
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled
)

# XGBoost 與參數搜尋
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [4, 6, 8],
    'learning_rate': [0.01, 0.1],
    'subsample': [0.8, 1.0]
}
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
grid = GridSearchCV(xgb, param_grid, scoring='accuracy', cv=cv, n_jobs=-1)
grid.fit(X_train, y_train)

# 模型評估
best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)
print("Best Parameters:", grid.best_params_)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
