In [None]:
%load_ext autoreload
%autoreload 2

# 必要なライブラリのインポート
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

import warnings
warnings.filterwarnings('ignore')

# カスタムモジュールのインポート
import sys
sys.path.append('/workspace')

from src.timeseries_processing import SequenceCreator, DataSplitter, DataStandardizer
from src.model_training import DelayPredictionModel
from src.evaluation import ModelEvaluator, ModelVisualizer

pd.set_option('display.max_columns', None)
delay_features = pd.read_csv('/workspace/data/merged_dataset.csv')

# GPU設定（CPU環境でも安全に動作）
print(f"TensorFlow version: {tf.__version__}")
try:
    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        for gpu in gpus:
            try:
                tf.config.experimental.set_memory_growth(gpu, True)
            except RuntimeError as e:
                print(f"Memory growth setup failed for {gpu.name}: {e}")
        strategy = tf.distribute.OneDeviceStrategy(device="/GPU:0")
        gpu_names = [gpu.name for gpu in gpus]
        print(f"✓ GPU detected: {gpu_names}")
        print(f"✓ Using GPU acceleration")
    else:
        strategy = tf.distribute.get_strategy()
        print("✓ No GPU detected - running on CPU")
        print("  (For GPU support, use devcontainer-gpu.json)")
except Exception as e:
    # CPU環境での安全なフォールバック
    strategy = tf.distribute.get_strategy()
    print(f"✓ Running on CPU (GPU setup skipped: {e})")

### 2.6-1 時系列データの作成

In [None]:
# ConvLSTM用の特徴量グループ定義（カスタマイズ可能）
feature_groups = {
    'temporal': ['hour_sin', 'hour_cos', 'day_sin', 'day_cos','is_peak_hour', 'is_weekend', 'arrival_delay'],
    # 'behavioral': ['is_peak_hour', 'is_weekend'],
    'weather': ['weather_sunny','weather_cloudy','weather_rainy', 'temp', 'precipitation'],
    'target': ['arrival_delay']
}

# 時系列シーケンス作成（カスタムモジュール使用）
sequence_creator = SequenceCreator(
    input_timesteps=8, 
    output_timesteps=3,
    feature_groups=feature_groups  # カスタムfeature_groupsを指定
)

# 空間的配置を有効にしてシーケンス作成（feature_colsとtarget_colは自動生成）
X_delay, y_delay, route_direction_info, used_features, feature_group_info = sequence_creator.create_route_direction_aware_sequences(
    delay_features,
    spatial_organization=True  # ConvLSTM用の空間配置を有効化
)

print(f"使用された特徴量: {used_features}")
print(f"シーケンス形状: X={X_delay.shape}, y={y_delay.shape}")

# 特徴量グループの表示
if feature_group_info:
    print(f"\n=== ConvLSTM Feature Groups (Width Dimension) ===")
    for group_name, info in feature_group_info.items():
        print(f"{group_name.capitalize()}: {info['features']} -> width indices [{info['start_idx']}:{info['end_idx']}]")
        print(f"  Size: {info['size']} features")

In [None]:
# データ分割
splitter = DataSplitter()

# Route-aware分割
X_train, X_test, y_train, y_test, train_routes, test_routes = splitter.train_test_split_by_route_direction(
    X_delay, y_delay, route_direction_info, train_ratio=0.9
)

# 分割結果の検証
validation_results = splitter.validate_split(
    X_train, X_test, y_train, y_test, 
    route_direction_info, train_routes, test_routes
)

# サマリー表示
splitter.print_split_summary(validation_results)

In [None]:
# データ標準化（特徴量のみ）
standardizer = DataStandardizer()
X_train_scaled = standardizer.fit_transform_features(X_train)
X_test_scaled = standardizer.transform_features(X_test)

# 目標変数は標準化せず、そのまま使用
print(f"\n=== Target Data Analysis (No Standardization) ===")
print(f"  y_train shape: {y_train.shape}")
print(f"  y_test shape: {y_test.shape}")
print(f"  y_train statistics:")
print(f"    Min: {y_train.min():.2f} minutes")
print(f"    Max: {y_train.max():.2f} minutes") 
print(f"    Mean: {y_train.mean():.2f} minutes")
print(f"    Std: {y_train.std():.2f} minutes")

In [None]:
# ConvLSTM用reshape
actual_feature_count = X_train_scaled.shape[2]
X_train_reshaped = splitter.reshape_for_convlstm(
    X_train_scaled, target_height=1, target_width=actual_feature_count
) 
X_test_reshaped = splitter.reshape_for_convlstm(
    X_test_scaled, target_height=1, target_width=actual_feature_count
)

print(f"\n=== ConvLSTM Reshape Results ===")
print(f"Training data: {X_train_reshaped.shape} (samples, timesteps, height, width, channels)")
print(f"Test data: {X_test_reshaped.shape}")

In [None]:
# モデル構築（カスタムモジュール使用）
INPUT_TIMESTEPS = 8
OUTPUT_TIMESTEPS = 3

print(f"=== Model Configuration Debug ===")
print(f"Data shapes:")
print(f"  X_train_scaled: {X_train_scaled.shape}")
print(f"  y_train_scaled: {y_train.shape}")
print(f"  X_train_reshaped: {X_train_reshaped.shape}")

# 遅延予測用モデルの作成
model_trainer = DelayPredictionModel(
    input_timesteps=INPUT_TIMESTEPS,
    output_timesteps=OUTPUT_TIMESTEPS
)

# モデル構築
actual_feature_count = X_train_scaled.shape[2]
input_shape = (INPUT_TIMESTEPS, 1, actual_feature_count, 1)  # (timesteps, height, width, channels) 
delay_model = model_trainer.build_model(input_shape)

print(f"\n=== Model Configuration ===")
print(f"Input shape: {input_shape}")
print(f"Feature count: {actual_feature_count}")
print(f"Expected output shape: (batch_size, {OUTPUT_TIMESTEPS})")
print(f"Model ready for training")

In [None]:
# モデル訓練（カスタムモジュール使用）
print("=== Delay Prediction Model Training Started ===")

# 形状の最終確認
print(f"Training data shapes:")
print(f"  X_train_reshaped: {X_train_reshaped.shape}")
print(f"  y_train_scaled: {y_train.shape}")

# 小さなサンプルでテスト
print("\nTesting model with small sample...")
try:
    test_input = X_train_reshaped[:2]  # 最初の2サンプル
    test_output = y_train[:2]
    test_pred = model_trainer.model.predict(test_input, verbose=0)
    print(f"Small sample test successful:")
    print(f"  Input shape: {test_input.shape}")
    print(f"  Expected output shape: {test_output.shape}")
    print(f"  Predicted output shape: {test_pred.shape}")
except Exception as e:
    print(f"Small sample test failed: {e}")
    raise e

# 訓練実行
history = model_trainer.train_model(
    X_train_reshaped, 
    y_train,
    batch_size=32, 
    epochs=50, 
    validation_split=0.2,
    model_path='best_delay_model.h5'
)

if history is not None:
    print("Training completed successfully!")
else:
    print("Training failed. Please check the error messages above.")

In [None]:
# モデル評価・可視化（カスタムモジュール使用）
print("=== Delay Prediction Model Evaluation ===")

# 予測実行
predictions = model_trainer.predict(X_test_reshaped, batch_size=32)

print(f"Prediction result shape: {predictions.shape}")
print(f"Actual value shape: {y_test.shape}")

# 最新の予測値のみを使用（系列の最後）
delay_pred_final = predictions[:, -1] if predictions.ndim > 1 else predictions
y_test_final = y_test[:, -1] if y_test.ndim > 1 else y_test

# 評価
evaluator = ModelEvaluator()
overall_metrics = evaluator.calculate_delay_metrics(y_test_final, delay_pred_final)
delay_level_analysis = evaluator.analyze_by_delay_level(y_test_final, delay_pred_final)

# 評価結果表示
evaluator.print_evaluation_summary(overall_metrics, delay_level_analysis)

# 可視化
visualizer = ModelVisualizer()

# 予測分析の可視化
visualizer.plot_prediction_analysis(y_test_final, delay_pred_final, overall_metrics)

In [None]:
# 詳細分析・可視化（カスタムモジュール使用）
print("=== Route-wise and Time-period-wise Delay Prediction Analysis ===")

# 遅延レベル別分析の可視化
visualizer.plot_delay_level_analysis(y_test_final, delay_pred_final, delay_level_analysis)

# 訓練履歴の可視化
if history is not None:
    visualizer.plot_training_history(history)
else:
    print("訓練履歴が利用できません")

# モデル保存
model_trainer.save_model('delay_prediction_model_final.h5')

print(f"\n=== Final Results Summary ===")
print(f"• Mean Absolute Error: {overall_metrics['mae']/60:.2f} minutes")
print(f"• RMSE: {overall_metrics['rmse']/60:.2f} minutes")  
print(f"• R² Score: {overall_metrics['r2']:.3f}")
print(f"• Direction prediction accuracy: {overall_metrics['direction_accuracy']*100:.1f}%")
print(f"• Within 1 minute accuracy: {overall_metrics['range_accuracies']['Within 1min']*100:.1f}%")
print(f"• Within 2 minutes accuracy: {overall_metrics['range_accuracies']['Within 2min']*100:.1f}%")

print("\n=== モジュール化完了 ===")
print("全ての処理がカスタムモジュールを使用して実行されました。")