In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.append('/workspace/GTFS')

from src.timeseries_processing import DataSplitter, DataStandardizer, DataSeparater
from src.model_training import DelayRegressionModel

pd.set_option('display.max_columns', None)

2025-09-20 10:49:29.374158: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-09-20 10:49:29.414395: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-09-20 10:49:30.289723: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-09-20 10:49:34.681164: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off,

In [2]:
delay_features = pd.read_csv('/workspace/GTFS/merged_dataset.csv')

In [3]:
data_separater = DataSeparater()
feature_cols = ['weather', 'temp', 'precipitation', 'hour_sin', 'hour_cos', 'day_sin', 'day_cos']
target_col = 'arrival_delay'

X_delay, y_delay, used_features = data_separater.X_Y_separate(delay_features, target_col, feature_cols)
print(f"シーケンス形状: X={X_delay.shape}, y={y_delay.shape}")

Using features: ['weather', 'temp', 'precipitation', 'hour_sin', 'hour_cos', 'day_sin', 'day_cos']
シーケンス形状: X=(5739, 7), y=(5739,)


In [4]:
# データ分割
data_splitter = DataSplitter()
X_delay_train, X_delay_test, y_delay_train, y_delay_test = data_splitter.train_test_split_temporal(
    X_delay, y_delay, train_ratio=0.9
)

print(f"\n=== Delay Prediction Data Split Results ===")
print(f"  Training data: X={X_delay_train.shape}, y={y_delay_train.shape}")
print(f"  Test data: X={X_delay_test.shape}, y={y_delay_test.shape}")

# データ標準化
standardizer = DataStandardizer()
X_delay_train_scaled = standardizer.fit_transform_features(X_delay_train)
X_delay_test_scaled = standardizer.transform_features(X_delay_test)
y_delay_train_scaled = standardizer.fit_transform_targets(y_delay_train)
y_delay_test_scaled = standardizer.transform_targets(y_delay_test)


=== Delay Prediction Data Split Results ===
  Training data: X=(5165, 7), y=(5165,)
  Test data: X=(574, 7), y=(574,)


In [5]:
# モデル作成（Ridge回帰）
model = DelayRegressionModel(model_type='ridge', normalize_features=True, feature_names=feature_cols)

# 訓練
history = model.train_model(X_delay_train_scaled, X_delay_test_scaled, y_delay_train_scaled, y_delay_test_scaled, validation_split=0.2)

# 予測
predictions = model.predict(X_delay_test_scaled)

# ハイパーパラメータチューニング
tuning_results = model.hyperparameter_tuning(X_delay_train_scaled, y_delay_train_scaled)

# モデル保存
model.save_model('delay_model.pkl')


=== Training RIDGE Model ===
=== Building RIDGE Regression Model ===
Model type: ridge
Normalize features: True

Training ridge model...

Training Metrics:
  MSE: 0.9678
  RMSE: 0.9837
  MAE: 0.7822
  R2: 0.0322
  MAPE: 170.3469
  DIRECTION_ACCURACY: 0.5688

Validation Metrics:
  MSE: 0.8584
  RMSE: 0.9265
  MAE: 0.6887
  R2: -0.1383
  MAPE: 176.3107
  DIRECTION_ACCURACY: 0.5906

Performing 5-fold cross-validation...
CV RMSE: 1.0451 (+/- 0.2513)

=== Training Summary ===
Model: ridge
Features: 7
Samples: 5739
Cross-validation RMSE: 1.0451

Top 10 Important Features:
  temp: 0.2023
  hour_cos: 0.1596
  day_sin: 0.1527
  precipitation: 0.0738
  day_cos: 0.0659
  hour_sin: 0.0444
  weather: 0.0153

=== Hyperparameter Tuning for ridge ===
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Best parameters: {'alpha': 100.0}
Best CV score (RMSE): 1.0538
Model saved to delay_model.pkl
