# 03. Model Baseline
Baseline models implementation.

In [None]:
import pandas as pd
import utils

# Load Split Data using shared function
df_train, df_valid, df_test, df_process, split_info = utils.load_split_data_with_combined()

if df_process is None:
    raise RuntimeError("Data not found. Please run 02_process_data.ipynb first.")

In [None]:
# Create Sequences using shared function
n_past_trips = 5
data = utils.prepare_model_data(df_train, df_test, df_process, n_past_trips=n_past_trips)

# Extract variables for convenience
X_delays_train = data['X_delays_train']
X_delays_test = data['X_delays_test']
y_test = data['y_test']

In [None]:
# Baseline 1: Last Trip Delay
evaluation_results = []

y_pred_baseline1 = X_delays_test[:, -1, :]

result_bl1 = utils.evaluate_model(
    y_test, y_pred_baseline1,
    model_name="Baseline 1 (Last Trip)",
    config={"method": "last_trip", "n_past_trips": n_past_trips}
)
evaluation_results.append(result_bl1)
print(result_bl1.summary())

In [None]:
# Baseline 2: Mean of Past N Trips
y_pred_baseline2 = X_delays_test.mean(axis=1)

result_bl2 = utils.evaluate_model(
    y_test, y_pred_baseline2,
    model_name="Baseline 2 (Mean Past N)",
    config={"method": "mean_past_n", "n_past_trips": n_past_trips}
)
evaluation_results.append(result_bl2)
print(result_bl2.summary())

In [None]:
# Model Comparison Table and Save Results
utils.display_and_save_results(evaluation_results, 'data/evaluation_results_baseline.json')

# Cross-Model Comparison

全ノートブックの評価結果を読み込み、モデルを横断して比較します。
各ノートブック（03〜06）を実行後に以下のセルを実行してください。

In [None]:
# Load and compare all models across notebooks
all_models_comparison = utils.load_all_evaluation_results('data')
if not all_models_comparison.empty:
    print("\n" + "="*80)
    print("ALL MODELS COMPARISON (Sorted by MAE)")
    print("="*80)
    display(all_models_comparison.round(4))