In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.append('/workspace/GTFS')

from src.timeseries_processing import DataSplitter, DataStandardizer, DataSeparater
from src.model_training import DelayRegressionModel

pd.set_option('display.max_columns', None)

In [None]:
delay_features = pd.read_csv('/workspace/GTFS/data/merged_dataset.csv')

In [None]:
delay_features = delay_features.loc[delay_features['direction_id'] == 0]

In [None]:
data_separater = DataSeparater()
feature_cols = ['weather_sunny', 'weather_cloudy', 'weather_rainy', 'humidex', 'hour_sin', 'hour_cos', 'day_sin', 'day_cos']
target_col = 'arrival_delay'

X_delay, y_delay, used_features = data_separater.X_Y_separate(delay_features, target_col, feature_cols)
print(f"シーケンス形状: X={X_delay.shape}, y={y_delay.shape}")

In [None]:
# データ分割
data_splitter = DataSplitter()
X_delay_train, X_delay_test, y_delay_train, y_delay_test = data_splitter.train_test_split_temporal(
    X_delay, y_delay, train_ratio=0.9
)

print(f"\n=== Delay Prediction Data Split Results ===")
print(f"  Training data: X={X_delay_train.shape}, y={y_delay_train.shape}")
print(f"  Test data: X={X_delay_test.shape}, y={y_delay_test.shape}")

# データ標準化
standardizer = DataStandardizer()
X_delay_train_scaled = standardizer.fit_transform_features(X_delay_train)
X_delay_test_scaled = standardizer.transform_features(X_delay_test)
y_delay_train_scaled = standardizer.fit_transform_targets(y_delay_train)
y_delay_test_scaled = standardizer.transform_targets(y_delay_test)

In [None]:
# モデル作成（Ridge回帰）
model = DelayRegressionModel(model_type='ridge', normalize_features=True, feature_names=feature_cols)

# 訓練
history = model.train_model(X_delay_train_scaled, X_delay_test_scaled, y_delay_train_scaled, y_delay_test_scaled, validation_split=0.2)

# 予測
predictions = model.predict(X_delay_test_scaled)

# ハイパーパラメータチューニング
tuning_results = model.hyperparameter_tuning(X_delay_train_scaled, y_delay_train_scaled)

# モデル保存
model.save_model('delay_model.pkl')