In [1]:
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from metr.components.metr_imc.traffic_data import TrafficData
from metr.utils import PathConfig
from tqdm import tqdm

In [2]:
PATH_CONF = PathConfig.from_yaml("../config.yaml")
FILTERED_PATH_CONF = PathConfig.from_yaml("../config_brits.yaml")

# 체크포인트 경로 설정 (LSTM 모델)
CHECKPOINT_PATH = "../traffic_imc/output/lstm/BRITS/model.ckpt"
if not os.path.exists(CHECKPOINT_PATH):
    raise FileNotFoundError(f"Checkpoint path {CHECKPOINT_PATH} does not exist.")

TRAFFIC_DATA_PATH = PATH_CONF.metr_imc_path
NODELINK_SHAPE_PATH = PATH_CONF.nodelink_link_path
ADJ_MX_PATH = PATH_CONF.adj_mx_path
METADATA_PATH = PATH_CONF.metadata_path

FILTERED_DATA_PATH = FILTERED_PATH_CONF.metr_imc_path
FILTERED_NODELINK_SHAPE_PATH = FILTERED_PATH_CONF.metr_shapefile_path
FILTERED_ADJ_MX_PATH = FILTERED_PATH_CONF.adj_mx_path
FILTERED_METADATA_PATH = FILTERED_PATH_CONF.metadata_path

In [3]:
# Set up visualization parameters
plt.rcParams["font.family"] = "AppleGothic"  # Use AppleGothic for better font rendering
plt.rcParams["axes.unicode_minus"] = False  # Prevent negative sign rendering issues

In [4]:
raw = TrafficData.import_from_hdf(TRAFFIC_DATA_PATH)
df = raw.data
df.iloc[:, :5]

Unnamed: 0,1630020000,1630168900,1640010500,1640021100,1680010502
2023-01-01 00:00:00,,,,,
2023-01-01 01:00:00,,,,,
2023-01-01 02:00:00,,,,,
2023-01-01 03:00:00,,,,,
2023-01-01 04:00:00,,,,,
...,...,...,...,...,...
2026-01-15 19:00:00,264.0,9.0,0.0,0.0,
2026-01-15 20:00:00,175.0,5.0,0.0,0.0,
2026-01-15 21:00:00,129.0,3.0,0.0,0.0,
2026-01-15 22:00:00,85.0,3.0,0.0,0.0,


In [5]:
imputed_raw = TrafficData.import_from_hdf(FILTERED_DATA_PATH)
imputed_df = imputed_raw.data
imputed_df.iloc[:, :5].head()

Unnamed: 0,1630020000,1630168900,1640010500,1640021100,1680055400
2023-01-26 00:00:00,0.0,0.0,204.0,0.0,10.779705
2023-01-26 01:00:00,0.0,0.0,137.0,0.0,1.620499
2023-01-26 02:00:00,0.0,0.0,83.0,0.0,0.326149
2023-01-26 03:00:00,0.0,0.0,57.0,0.0,2.690292
2023-01-26 04:00:00,0.0,0.0,136.0,0.0,-0.243927


In [6]:
metadata_raw = pd.read_hdf(FILTERED_METADATA_PATH)
metadata_raw

Unnamed: 0,LINK_ID,ROAD_NAME,LANES,ROAD_RANK,ROAD_TYPE,MAX_SPD,REST_VEH
15,1630020000,한나루로,2,104,000,50,0
17,1630168900,석정로,2,104,000,50,0
18,1640010500,경원대로,5,104,000,60,0
21,1640021100,먼우금로,3,104,000,30,0
50,1680055400,봉수대로,3,104,000,60,0
...,...,...,...,...,...,...,...
15935,1650278500,남동대로,4,104,000,50,0
15936,1650278600,남동대로,4,104,000,50,0
15972,1680257100,중봉대로,2,104,000,60,0
15973,1680257900,중봉대로,2,104,000,60,0


In [7]:
metadata_raw["ROAD_RANK"].unique()

<ArrowStringArray>
['104', '107', '101', '103', '105', '106']
Length: 6, dtype: str

In [8]:
highways_info = metadata_raw[metadata_raw['ROAD_RANK'] == '101']
highways_list = highways_info['LINK_ID']
highways_list

116      1630167000
2221     1630174501
2223     1630174601
2224     1630174701
2225     1630174801
2226     1630174901
2227     1630175001
2228     1630175101
2229     1630175201
2230     1630175301
2231     1630175401
3719     1610079200
3720     1610079300
4546     1610080400
4547     1610080600
7213     1640054700
8246     1640316500
8278     1640320200
8279     1640320300
8281     1650004100
8282     1640320400
8298     1650003901
8310     1650004200
8311     1650004300
8312     1650004400
8313     1650004500
8315     1650004600
8316     1650004900
8317     1650005000
9241     1650314500
9242     1650314900
9489     1650350800
9490     1650350900
9524     1660003701
9525     1660003702
9526     1660003801
9528     1660003802
9529     1660003900
9530     1660004000
10140    1663132600
10331    1670003500
10334    1670002500
10339    1670002800
10344    1670003400
10353    1670004000
10363    1670004100
10997    1670222800
10999    1670222900
11055    1680008501
11056    1680008503


In [9]:
normal_road_info = metadata_raw[metadata_raw['ROAD_RANK'] != '101']
normal_road_list = normal_road_info['LINK_ID']
normal_road_list

15       1630020000
17       1630168900
18       1640010500
21       1640021100
50       1680055400
            ...    
15935    1650278500
15936    1650278600
15972    1680257100
15973    1680257900
15974    1680258100
Name: LINK_ID, Length: 1960, dtype: str

## LSTM 모델 고속도로 센서 평가

학습된 LSTM 모델을 불러와서 고속도로(ROAD_RANK='101') 센서에 대해서만 MAE, RMSE, MAPE를 계산합니다.

In [10]:
import torch
from sklearn.metrics import mean_absolute_error, mean_squared_error

from metr.datasets.rnn.datamodule import MultiSensorTrafficDataModule
from metr_val.models.rnn.module import MultiSensorLSTMLightningModule

  from pkg_resources import DistributionNotFound, get_distribution
  from .autonotebook import tqdm as notebook_tqdm


In [11]:
# 디바이스 설정
device = torch.device("mps") if torch.backends.mps.is_available() else \
         torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: mps


In [12]:
# 고속도로 및 일반도로 센서 ID 리스트 (str 타입으로 변환)
highway_sensor_ids = [str(sid) for sid in highways_list]
normal_road_sensor_ids = [str(sid) for sid in normal_road_list]

print(f"Highway sensors count: {len(highway_sensor_ids)}")
print(f"Normal road sensors count: {len(normal_road_sensor_ids)}")

Highway sensors count: 55
Normal road sensors count: 1960


In [13]:
# 데이터 모듈 설정 (LSTM용 - 전체 센서)
data_module = MultiSensorTrafficDataModule(
    training_dataset_path=FILTERED_PATH_CONF.metr_imc_training_path,
    test_dataset_path=FILTERED_PATH_CONF.metr_imc_test_path,
    test_missing_path=FILTERED_PATH_CONF.metr_imc_test_missing_path,
    train_val_split=0.8,
    seq_length=24,
    batch_size=512,
    num_workers=0,  # Notebook에서는 0 권장
    shuffle_training=False,
    target_sensors=None,  # 전체 센서 사용
    scale_method="normal",
)

# 데이터 모듈 설정
data_module.setup()
scaler = data_module.scaler

print(f"Train dataset size: {len(data_module.train_dataset)}")
print(f"Test dataset size: {len(data_module.test_dataset)}")

Train dataset size: 40187160
Test dataset size: 2176200


In [14]:
# 체크포인트에서 LSTM 모델 로드 (CPU로 먼저 로드)
model = MultiSensorLSTMLightningModule.load_from_checkpoint(
    CHECKPOINT_PATH,
    scaler=scaler,
    map_location="cpu",
)

# 모델 전체를 디바이스로 이동
model.to(device)
model.eval()
print(f"Model Path: {CHECKPOINT_PATH}")
print(f"Model loaded on device: {device}")

Model Path: ../traffic_imc/output/lstm/BRITS/model.ckpt
Model loaded on device: mps


In [15]:
# 테스트 데이터에서 예측 수행
test_loader = data_module.test_dataloader()

all_y_true = []
all_y_pred = []
all_is_missing = []
all_sensor_names = []

with torch.no_grad():
    for batch in tqdm(test_loader):
        # LSTM 배치 형태: (x, y, x_time_indices, y_time_indices, sensor_names, y_is_missing_list)
        x, y, x_time_indices, y_time_indices, sensor_names, y_is_missing_list = batch
        x = x.to(device)
        
        # 모델 예측
        y_hat = model(x)
        
        # Reshape y if needed
        if y.dim() > 2:
            y = y.squeeze(-1)
        
        all_y_true.append(y.cpu().numpy())
        all_y_pred.append(y_hat.cpu().numpy())
        all_is_missing.extend(y_is_missing_list)  # list of bool
        all_sensor_names.extend(sensor_names)

# 배열 연결
y_true = np.concatenate(all_y_true, axis=0)  # (total_samples,)
y_pred = np.concatenate(all_y_pred, axis=0)
is_missing = np.array(all_is_missing)  # (total_samples,)

print(f"Predictions shape: {y_pred.shape}")
print(f"Ground truth shape: {y_true.shape}")
print(f"Is missing shape: {is_missing.shape}")
print(f"Total test samples: {len(all_sensor_names)}")

100%|██████████| 4251/4251 [03:41<00:00, 19.20it/s]


Predictions shape: (2176200, 1)
Ground truth shape: (2176200, 1)
Is missing shape: (2176200,)
Total test samples: 2176200


In [16]:
# 전체 센서 통계
print(f"All sensors predictions shape: {y_pred.shape}")

# Flatten (LSTM은 이미 1D)
y_true_flat = y_true.flatten()
y_pred_flat = y_pred.flatten()
is_missing_flat = is_missing.flatten()
sensor_names_array = np.array(all_sensor_names)

# Valid mask (보간되지 않은 원본 데이터만)
valid_mask_all = ~is_missing_flat

# 고속도로 센서 마스크 생성
highway_sensor_set = set(highway_sensor_ids)
is_highway = np.array([sn in highway_sensor_set for sn in all_sensor_names])

# 일반도로 센서 마스크 생성
normal_road_sensor_set = set(normal_road_sensor_ids)
is_normal_road = np.array([sn in normal_road_sensor_set for sn in all_sensor_names])

# 고속도로 & valid 마스크
valid_mask_highway = valid_mask_all & is_highway

# 일반도로 & valid 마스크
valid_mask_normal = valid_mask_all & is_normal_road

print(f"\n=== All Sensors Statistics ===")
print(f"Total points: {len(y_true_flat)}")
print(f"Valid (original) points: {valid_mask_all.sum()} ({valid_mask_all.sum()/len(y_true_flat)*100:.1f}%)")

print(f"\n=== Highway Sensor Statistics ===")
print(f"Highway points: {is_highway.sum()}")
print(f"Valid highway points: {valid_mask_highway.sum()} ({valid_mask_highway.sum()/is_highway.sum()*100:.1f}%)")

print(f"\n=== Normal Road Sensor Statistics ===")
print(f"Normal road points: {is_normal_road.sum()}")
print(f"Valid normal road points: {valid_mask_normal.sum()} ({valid_mask_normal.sum()/is_normal_road.sum()*100:.1f}%)")

All sensors predictions shape: (2176200, 1)

=== All Sensors Statistics ===
Total points: 2176200
Valid (original) points: 1922917 (88.4%)

=== Highway Sensor Statistics ===
Highway points: 59400
Valid highway points: 37427 (63.0%)

=== Normal Road Sensor Statistics ===
Normal road points: 2116800
Valid normal road points: 1885490 (89.1%)


In [17]:
# Scaled 메트릭 계산 (전체 센서 & 고속도로 센서 & 일반도로 센서)

# sMAPE 계산 함수
def smape(y_true, y_pred):
    """Symmetric Mean Absolute Percentage Error (sMAPE)"""
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    nonzero_denom = denominator != 0
    if not nonzero_denom.any():
        return 0.0
    return np.mean(np.abs(y_true[nonzero_denom] - y_pred[nonzero_denom]) / denominator[nonzero_denom]) * 100

# 전체 센서 - valid 데이터만
y_true_all_valid = y_true_flat[valid_mask_all]
y_pred_all_valid = y_pred_flat[valid_mask_all]

# 고속도로 센서 - valid 데이터만
y_true_highway_valid = y_true_flat[valid_mask_highway]
y_pred_highway_valid = y_pred_flat[valid_mask_highway]

# 일반도로 센서 - valid 데이터만
y_true_normal_valid = y_true_flat[valid_mask_normal]
y_pred_normal_valid = y_pred_flat[valid_mask_normal]

# 전체 센서 메트릭 (Scaled)
mae_all_scaled = mean_absolute_error(y_true_all_valid, y_pred_all_valid)
rmse_all_scaled = np.sqrt(mean_squared_error(y_true_all_valid, y_pred_all_valid))
nonzero_all = y_true_all_valid != 0
mape_all_scaled = np.mean(np.abs(
    (y_true_all_valid[nonzero_all] - y_pred_all_valid[nonzero_all]) / y_true_all_valid[nonzero_all]
)) * 100 if nonzero_all.any() else 0.0
smape_all_scaled = smape(y_true_all_valid, y_pred_all_valid)

# 고속도로 센서 메트릭 (Scaled)
mae_highway_scaled = mean_absolute_error(y_true_highway_valid, y_pred_highway_valid)
rmse_highway_scaled = np.sqrt(mean_squared_error(y_true_highway_valid, y_pred_highway_valid))
nonzero_highway = y_true_highway_valid != 0
mape_highway_scaled = np.mean(np.abs(
    (y_true_highway_valid[nonzero_highway] - y_pred_highway_valid[nonzero_highway]) / y_true_highway_valid[nonzero_highway]
)) * 100 if nonzero_highway.any() else 0.0
smape_highway_scaled = smape(y_true_highway_valid, y_pred_highway_valid)

# 일반도로 센서 메트릭 (Scaled)
mae_normal_scaled = mean_absolute_error(y_true_normal_valid, y_pred_normal_valid)
rmse_normal_scaled = np.sqrt(mean_squared_error(y_true_normal_valid, y_pred_normal_valid))
nonzero_normal = y_true_normal_valid != 0
mape_normal_scaled = np.mean(np.abs(
    (y_true_normal_valid[nonzero_normal] - y_pred_normal_valid[nonzero_normal]) / y_true_normal_valid[nonzero_normal]
)) * 100 if nonzero_normal.any() else 0.0
smape_normal_scaled = smape(y_true_normal_valid, y_pred_normal_valid)

print(f"\n=== Scaled Metrics (Excluding Interpolated) ===")
print(f"{'Metric':<10} {'All Sensors':>15} {'Highway':>15} {'Normal Road':>15}")
print("-" * 60)
print(f"{'MAE':<10} {mae_all_scaled:>15.4f} {mae_highway_scaled:>15.4f} {mae_normal_scaled:>15.4f}")
print(f"{'RMSE':<10} {rmse_all_scaled:>15.4f} {rmse_highway_scaled:>15.4f} {rmse_normal_scaled:>15.4f}")
print(f"{'MAPE (%)':<10} {mape_all_scaled:>15.2f} {mape_highway_scaled:>15.2f} {mape_normal_scaled:>15.2f}")
print(f"{'sMAPE (%)':<10} {smape_all_scaled:>15.2f} {smape_highway_scaled:>15.2f} {smape_normal_scaled:>15.2f}")


=== Scaled Metrics (Excluding Interpolated) ===
Metric         All Sensors         Highway     Normal Road
------------------------------------------------------------
MAE                 0.0026          0.0108          0.0024
RMSE                0.0054          0.0163          0.0050
MAPE (%)              0.61            2.37            0.58
sMAPE (%)             0.61            2.36            0.58


In [18]:
# Unscaled (원본 스케일) 메트릭 계산
def inverse_transform(data, scaler):
    """Inverse transform scaled data back to original scale."""
    original_shape = data.shape
    flat_data = data.reshape(-1, 1)
    unscaled = scaler.inverse_transform(flat_data)
    return unscaled.reshape(original_shape)

# 전체 센서 - 원본 스케일로 변환
y_true_all_unscaled = inverse_transform(y_true_all_valid, scaler)
y_pred_all_unscaled = inverse_transform(y_pred_all_valid, scaler)

# 고속도로 센서 - 원본 스케일로 변환
y_true_highway_unscaled = inverse_transform(y_true_highway_valid, scaler)
y_pred_highway_unscaled = inverse_transform(y_pred_highway_valid, scaler)

# 일반도로 센서 - 원본 스케일로 변환
y_true_normal_unscaled = inverse_transform(y_true_normal_valid, scaler)
y_pred_normal_unscaled = inverse_transform(y_pred_normal_valid, scaler)

In [19]:
# 전체 센서 메트릭 (Unscaled)
mae_all_unscaled = mean_absolute_error(y_true_all_unscaled, y_pred_all_unscaled)
rmse_all_unscaled = np.sqrt(mean_squared_error(y_true_all_unscaled, y_pred_all_unscaled))
nonzero_all_unscaled = y_true_all_unscaled != 0
mape_all_unscaled = np.mean(np.abs(
    (y_true_all_unscaled[nonzero_all_unscaled] - y_pred_all_unscaled[nonzero_all_unscaled]) 
    / y_true_all_unscaled[nonzero_all_unscaled]
)) * 100 if nonzero_all_unscaled.any() else 0.0
smape_all_unscaled = smape(y_true_all_unscaled.flatten(), y_pred_all_unscaled.flatten())

# 고속도로 센서 메트릭 (Unscaled)
mae_highway_unscaled = mean_absolute_error(y_true_highway_unscaled, y_pred_highway_unscaled)
rmse_highway_unscaled = np.sqrt(mean_squared_error(y_true_highway_unscaled, y_pred_highway_unscaled))
nonzero_highway_unscaled = y_true_highway_unscaled != 0
mape_highway_unscaled = np.mean(np.abs(
    (y_true_highway_unscaled[nonzero_highway_unscaled] - y_pred_highway_unscaled[nonzero_highway_unscaled]) 
    / y_true_highway_unscaled[nonzero_highway_unscaled]
)) * 100 if nonzero_highway_unscaled.any() else 0.0
smape_highway_unscaled = smape(y_true_highway_unscaled.flatten(), y_pred_highway_unscaled.flatten())

# 일반도로 센서 메트릭 (Unscaled)
mae_normal_unscaled = mean_absolute_error(y_true_normal_unscaled, y_pred_normal_unscaled)
rmse_normal_unscaled = np.sqrt(mean_squared_error(y_true_normal_unscaled, y_pred_normal_unscaled))
nonzero_normal_unscaled = y_true_normal_unscaled != 0
mape_normal_unscaled = np.mean(np.abs(
    (y_true_normal_unscaled[nonzero_normal_unscaled] - y_pred_normal_unscaled[nonzero_normal_unscaled]) 
    / y_true_normal_unscaled[nonzero_normal_unscaled]
)) * 100 if nonzero_normal_unscaled.any() else 0.0
smape_normal_unscaled = smape(y_true_normal_unscaled.flatten(), y_pred_normal_unscaled.flatten())

print(f"\n=== Original Scale Metrics (Excluding Interpolated) ===")
print(f"{'Metric':<10} {'All Sensors':>15} {'Highway':>15} {'Normal Road':>15}")
print("-" * 60)
print(f"{'MAE':<10} {mae_all_unscaled:>15.4f} {mae_highway_unscaled:>15.4f} {mae_normal_unscaled:>15.4f}")
print(f"{'RMSE':<10} {rmse_all_unscaled:>15.4f} {rmse_highway_unscaled:>15.4f} {rmse_normal_unscaled:>15.4f}")
print(f"{'MAPE (%)':<10} {mape_all_unscaled:>15.2f} {mape_highway_unscaled:>15.2f} {mape_normal_unscaled:>15.2f}")
print(f"{'sMAPE (%)':<10} {smape_all_unscaled:>15.2f} {smape_highway_unscaled:>15.2f} {smape_normal_unscaled:>15.2f}")


=== Original Scale Metrics (Excluding Interpolated) ===
Metric         All Sensors         Highway     Normal Road
------------------------------------------------------------
MAE                27.5070        115.9320         25.7518
RMSE               58.1629        174.6772         53.3330
MAPE (%)        3460709.38     10049317.97      3329923.83
sMAPE (%)            69.01           30.89           69.77


In [20]:
# LSTM 결과 요약 출력 (전체 / 고속도로 / 일반도로 비교)
print("=" * 75)
print("LSTM Performance Comparison (Original Scale)")
print("=" * 75)
print(f"\n{'Metric':<10} {'All Sensors':>15} {'Highway':>15} {'Normal Road':>15}")
print("-" * 75)
print(f"{'MAE':<10} {mae_all_unscaled:>15.4f} {mae_highway_unscaled:>15.4f} {mae_normal_unscaled:>15.4f}")
print(f"{'RMSE':<10} {rmse_all_unscaled:>15.4f} {rmse_highway_unscaled:>15.4f} {rmse_normal_unscaled:>15.4f}")
print(f"{'MAPE (%)':<10} {mape_all_unscaled:>15.2f} {mape_highway_unscaled:>15.2f} {mape_normal_unscaled:>15.2f}")
print(f"{'sMAPE (%)':<10} {smape_all_unscaled:>15.2f} {smape_highway_unscaled:>15.2f} {smape_normal_unscaled:>15.2f}")
print("=" * 75)

print(f"\n=== Data Points Summary ===")
print(f"All sensors valid points: {valid_mask_all.sum():,}")
print(f"Highway valid points: {valid_mask_highway.sum():,}")
print(f"Normal road valid points: {valid_mask_normal.sum():,}")

LSTM Performance Comparison (Original Scale)

Metric         All Sensors         Highway     Normal Road
---------------------------------------------------------------------------
MAE                27.5070        115.9320         25.7518
RMSE               58.1629        174.6772         53.3330
MAPE (%)        3460709.38     10049317.97      3329923.83
sMAPE (%)            69.01           30.89           69.77

=== Data Points Summary ===
All sensors valid points: 1,922,917
Highway valid points: 37,427
Normal road valid points: 1,885,490
