In [1]:
import pickle

import geopandas as gpd
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
from metr.components.metr_imc.traffic_data import TrafficData
from metr.utils import PathConfig
from numpy.lib.npyio import NpzFile
from scipy.stats import norm
from statsmodels.tsa.seasonal import STL
from statsmodels.tsa.stattools import acf, adfuller
from scipy.stats import zscore
from scipy.stats import entropy, norm
import pycatch22
import h5py
from tqdm import tqdm

import os.path

In [2]:
PATH_CONF = PathConfig.from_yaml("../config.yaml")
FILTERED_PATH_CONF = PathConfig.from_yaml("../config_brits.yaml")
# 체크포인트 경로 설정 (DCRNN)
CHECKPOINT_PATH = "../traffic_imc/output/dcrnn/BRITS/model.ckpt"
if not os.path.exists(CHECKPOINT_PATH):
    raise FileNotFoundError(f"Checkpoint path {CHECKPOINT_PATH} does not exist.")

TRAFFIC_DATA_PATH = PATH_CONF.metr_imc_path
NODELINK_SHAPE_PATH = PATH_CONF.nodelink_link_path
ADJ_MX_PATH = PATH_CONF.adj_mx_path
METADATA_PATH = PATH_CONF.metadata_path

FILTERED_DATA_PATH = FILTERED_PATH_CONF.metr_imc_path
FILTERED_NODELINK_SHAPE_PATH = FILTERED_PATH_CONF.metr_shapefile_path
FILTERED_ADJ_MX_PATH = FILTERED_PATH_CONF.adj_mx_path
FILTERED_METADATA_PATH = FILTERED_PATH_CONF.metadata_path

In [3]:
# Set up visualization parameters
plt.rcParams["font.family"] = "AppleGothic"  # Use AppleGothic for better font rendering
plt.rcParams["axes.unicode_minus"] = False  # Prevent negative sign rendering issues

In [4]:
raw = TrafficData.import_from_hdf(TRAFFIC_DATA_PATH)
df = raw.data
df.iloc[:, :5]

Unnamed: 0,1630020000,1630168900,1640010500,1640021100,1680010502
2023-01-01 00:00:00,,,,,
2023-01-01 01:00:00,,,,,
2023-01-01 02:00:00,,,,,
2023-01-01 03:00:00,,,,,
2023-01-01 04:00:00,,,,,
...,...,...,...,...,...
2026-01-15 19:00:00,264.0,9.0,0.0,0.0,
2026-01-15 20:00:00,175.0,5.0,0.0,0.0,
2026-01-15 21:00:00,129.0,3.0,0.0,0.0,
2026-01-15 22:00:00,85.0,3.0,0.0,0.0,


In [5]:
imputed_raw = TrafficData.import_from_hdf(FILTERED_DATA_PATH)
imputed_df = imputed_raw.data
imputed_df.iloc[:, :5].head()

Unnamed: 0,1630020000,1630168900,1640010500,1640021100,1680055400
2023-01-26 00:00:00,0.0,0.0,204.0,0.0,10.779705
2023-01-26 01:00:00,0.0,0.0,137.0,0.0,1.620499
2023-01-26 02:00:00,0.0,0.0,83.0,0.0,0.326149
2023-01-26 03:00:00,0.0,0.0,57.0,0.0,2.690292
2023-01-26 04:00:00,0.0,0.0,136.0,0.0,-0.243927


In [6]:
metadata_raw = pd.read_hdf(FILTERED_METADATA_PATH)
metadata_raw

Unnamed: 0,LINK_ID,ROAD_NAME,LANES,ROAD_RANK,ROAD_TYPE,MAX_SPD,REST_VEH
15,1630020000,한나루로,2,104,000,50,0
17,1630168900,석정로,2,104,000,50,0
18,1640010500,경원대로,5,104,000,60,0
21,1640021100,먼우금로,3,104,000,30,0
50,1680055400,봉수대로,3,104,000,60,0
...,...,...,...,...,...,...,...
15935,1650278500,남동대로,4,104,000,50,0
15936,1650278600,남동대로,4,104,000,50,0
15972,1680257100,중봉대로,2,104,000,60,0
15973,1680257900,중봉대로,2,104,000,60,0


In [7]:
metadata_raw["ROAD_RANK"].unique()

<ArrowStringArray>
['104', '107', '101', '103', '105', '106']
Length: 6, dtype: str

In [8]:
highways_info = metadata_raw[metadata_raw['ROAD_RANK'] == '101']
highways_list = highways_info['LINK_ID']
highways_list

116      1630167000
2221     1630174501
2223     1630174601
2224     1630174701
2225     1630174801
2226     1630174901
2227     1630175001
2228     1630175101
2229     1630175201
2230     1630175301
2231     1630175401
3719     1610079200
3720     1610079300
4546     1610080400
4547     1610080600
7213     1640054700
8246     1640316500
8278     1640320200
8279     1640320300
8281     1650004100
8282     1640320400
8298     1650003901
8310     1650004200
8311     1650004300
8312     1650004400
8313     1650004500
8315     1650004600
8316     1650004900
8317     1650005000
9241     1650314500
9242     1650314900
9489     1650350800
9490     1650350900
9524     1660003701
9525     1660003702
9526     1660003801
9528     1660003802
9529     1660003900
9530     1660004000
10140    1663132600
10331    1670003500
10334    1670002500
10339    1670002800
10344    1670003400
10353    1670004000
10363    1670004100
10997    1670222800
10999    1670222900
11055    1680008501
11056    1680008503


In [9]:
normal_road_info = metadata_raw[metadata_raw['ROAD_RANK'] != '101']
normal_road_list = normal_road_info['LINK_ID']
normal_road_list

15       1630020000
17       1630168900
18       1640010500
21       1640021100
50       1680055400
            ...    
15935    1650278500
15936    1650278600
15972    1680257100
15973    1680257900
15974    1680258100
Name: LINK_ID, Length: 1960, dtype: str

## DCRNN 모델 고속도로 센서 평가

학습된 DCRNN 모델을 불러와서 고속도로(ROAD_RANK='101') 센서에 대해서만 MAE, RMSE, MAPE를 계산합니다.

In [10]:
import torch
from sklearn.metrics import mean_absolute_error, mean_squared_error

from metr.components.adj_mx import AdjacencyMatrix
from metr.datasets.dcrnn import DCRNNSplitDataModule
from metr_val.models.dcrnn import DCRNNLightningModule

  from pkg_resources import DistributionNotFound, get_distribution
  from .autonotebook import tqdm as notebook_tqdm


In [11]:
# 디바이스 설정
device = torch.device("mps") if torch.backends.mps.is_available() else \
         torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: mps


In [12]:
# Adjacency Matrix 로드
adj_mx_obj = AdjacencyMatrix.import_from_pickle(FILTERED_ADJ_MX_PATH)
adj_mx = adj_mx_obj.adj_mx
sensor_ids = adj_mx_obj.sensor_ids  # 순서가 지정된 센서 ID 리스트
n_vertex = adj_mx.shape[0]

print(f"Number of vertices (sensors): {n_vertex}")
print(f"Sensor IDs sample: {sensor_ids[:5]}")
print(f"Adjacency matrix shape: {adj_mx.shape}")

Number of vertices (sensors): 2015
Sensor IDs sample: ['1630020000', '1630168900', '1640010500', '1640021100', '1680055400']
Adjacency matrix shape: (2015, 2015)


In [13]:
# 데이터 모듈 설정 (DCRNN용)
# DCRNN 하이퍼파라미터 (dcrnn.py 참조)
SEQ_LEN = 12
HORIZON = 12
ADD_TIME_IN_DAY = True
ADD_DAY_IN_WEEK = False

data_module = DCRNNSplitDataModule(
    training_data_path=FILTERED_PATH_CONF.metr_imc_training_path,
    test_data_path=FILTERED_PATH_CONF.metr_imc_test_path,
    test_missing_path=FILTERED_PATH_CONF.metr_imc_test_missing_path,
    adj_mx_path=FILTERED_PATH_CONF.adj_mx_path,
    seq_len=SEQ_LEN,
    horizon=HORIZON,
    batch_size=64,
    num_workers=0,  # Notebook에서는 0 권장
    shuffle_training=False,
    train_val_split=0.8,
    add_time_in_day=ADD_TIME_IN_DAY,
    add_day_in_week=ADD_DAY_IN_WEEK,
)

# fit 단계에서 scaler 준비
data_module.setup("fit")
scaler = data_module.scaler

# test 단계 설정
data_module.setup("test")
print(f"Test dataset size: {len(data_module.test_dataset)}")
print(f"Input dimension: {data_module.input_dim}")
print(f"Output dimension: {data_module.output_dim}")

Training data split - Train: 19968 rows (80%), Val: 4992 rows (20%)
Test data loaded: 1104 rows
Test dataset size: 1081
Input dimension: 2
Output dimension: 1


In [14]:
# 체크포인트에서 DCRNN 모델 로드
# DCRNN은 첫 forward pass 시 동적으로 파라미터를 생성하므로 특별한 로딩 과정 필요

# 1. 기본 모델 생성 (동적 파라미터 없이)
model = DCRNNLightningModule.load_from_checkpoint(
    CHECKPOINT_PATH,
    adj_mx=adj_mx,
    scaler=scaler,
    map_location="cpu",
    strict=False,  # 동적 파라미터 로딩을 위해 필요
)

# 2. 더미 forward pass를 수행하여 동적 파라미터 생성
# 테스트 데이터의 첫 배치를 가져와서 forward pass 수행
model.eval()  # eval 모드에서는 curriculum learning이 사용되지 않음
with torch.no_grad():
    dummy_batch = next(iter(data_module.test_dataloader()))
    dummy_x = dummy_batch[0]  # (seq_len, batch_size, num_nodes * input_dim)
    _ = model(dummy_x)  # 이 과정에서 DCGRUCell의 동적 파라미터가 생성됨

# 3. 이제 동적 파라미터가 생성되었으므로 state_dict를 다시 로드
checkpoint = torch.load(CHECKPOINT_PATH, map_location="cpu", weights_only=False)
model.load_state_dict(checkpoint["state_dict"], strict=True)  # strict=True로 변경하여 확인

# 모델 전체를 디바이스로 이동
model.to(device)
model.eval()
print(f"Model loaded from {CHECKPOINT_PATH}")

/Users/sbyim/Workspace/Python/songdo-traffic/notebooks/.venv/lib/python3.11/site-packages/lightning/pytorch/core/saving.py:197: Found keys that are not in the model state dict but in the checkpoint: ['model.encoder_model.dcgru_layers.0.gconv_weight_(330, 128)', 'model.encoder_model.dcgru_layers.0.gconv_biases_128', 'model.encoder_model.dcgru_layers.0.gconv_weight_(330, 64)', 'model.encoder_model.dcgru_layers.0.gconv_biases_64', 'model.encoder_model.dcgru_layers.1.gconv_weight_(640, 128)', 'model.encoder_model.dcgru_layers.1.gconv_biases_128', 'model.encoder_model.dcgru_layers.1.gconv_weight_(640, 64)', 'model.encoder_model.dcgru_layers.1.gconv_biases_64', 'model.decoder_model.dcgru_layers.0.gconv_weight_(325, 128)', 'model.decoder_model.dcgru_layers.0.gconv_biases_128', 'model.decoder_model.dcgru_layers.0.gconv_weight_(325, 64)', 'model.decoder_model.dcgru_layers.0.gconv_biases_64', 'model.decoder_model.dcgru_layers.1.gconv_weight_(640, 128)', 'model.decoder_model.dcgru_layers.1.gconv_

Model loaded from ../traffic_imc/output/dcrnn/BRITS/model.ckpt


In [15]:
# 고속도로 및 일반도로 센서의 인덱스 찾기 (sensor_id_to_idx 사용)
sensor_id_to_idx = adj_mx_obj.sensor_id_to_idx

# 고속도로 센서의 인덱스 추출 (명시적 매핑 사용)
highway_indices = [
    sensor_id_to_idx[str(sid)] 
    for sid in highways_list 
    if str(sid) in sensor_id_to_idx
]

# 일반도로 센서의 인덱스 추출
normal_road_indices = [
    sensor_id_to_idx[str(sid)] 
    for sid in normal_road_list 
    if str(sid) in sensor_id_to_idx
]

print(f"Total sensors: {len(sensor_id_to_idx)}")
print(f"Highway sensors: {len(highway_indices)}")
print(f"Normal road sensors: {len(normal_road_indices)}")

Total sensors: 2015
Highway sensors: 55
Normal road sensors: 1960


In [16]:
# 테스트 데이터에서 예측 수행
# DCRNN 입출력 형태:
# x: (seq_len, batch_size, num_nodes * input_dim)
# y: (horizon, batch_size, num_nodes * output_dim)
# y_hat: (horizon, batch_size, num_nodes * output_dim)

test_loader = data_module.test_dataloader()

all_y_true = []
all_y_pred = []
all_is_missing = []

with torch.no_grad():
    for batch in tqdm(test_loader):
        x, y, missing = batch
        x = x.to(device)
        y = y.to(device)
        
        # 모델 예측 (DCRNN forward)
        y_hat = model(x)
        
        # DCRNN 출력 형태: (horizon, batch, num_nodes * output_dim)
        # -> (batch, horizon, num_nodes) 로 변환
        batch_size = y.shape[1]
        num_nodes = n_vertex
        
        y_reshaped = y.permute(1, 0, 2).reshape(batch_size, HORIZON, num_nodes)
        y_hat_reshaped = y_hat.permute(1, 0, 2).reshape(batch_size, HORIZON, num_nodes)
        missing_reshaped = missing.permute(1, 0, 2)  # (batch, horizon, num_nodes)
        
        all_y_true.append(y_reshaped.cpu().numpy())
        all_y_pred.append(y_hat_reshaped.cpu().numpy())
        all_is_missing.append(missing_reshaped.cpu().numpy())

# 배열 연결: (total_samples, horizon, num_nodes)
y_true = np.concatenate(all_y_true, axis=0)
y_pred = np.concatenate(all_y_pred, axis=0)
is_missing = np.concatenate(all_is_missing, axis=0)

print(f"Predictions shape: {y_pred.shape}")
print(f"Ground truth shape: {y_true.shape}")
print(f"Missing mask shape: {is_missing.shape}")

100%|██████████| 17/17 [26:36<00:00, 93.90s/it]

Predictions shape: (1081, 12, 2015)
Ground truth shape: (1081, 12, 2015)
Missing mask shape: (1081, 12, 2015)





In [17]:
# 디버깅: y_true와 y_pred의 범위 확인
print("=== 값 범위 확인 (Scaled) ===")
print(f"y_true range: [{y_true.min():.4f}, {y_true.max():.4f}], mean: {y_true.mean():.4f}")
print(f"y_pred range: [{y_pred.min():.4f}, {y_pred.max():.4f}], mean: {y_pred.mean():.4f}")

# Inverse transform 후 값 확인
y_true_unscaled_debug = scaler.inverse_transform(y_true.reshape(-1, 1)).flatten()
y_pred_unscaled_debug = scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()

print("\n=== 값 범위 확인 (Unscaled) ===")
print(f"y_true_unscaled range: [{y_true_unscaled_debug.min():.4f}, {y_true_unscaled_debug.max():.4f}], mean: {y_true_unscaled_debug.mean():.4f}")
print(f"y_pred_unscaled range: [{y_pred_unscaled_debug.min():.4f}, {y_pred_unscaled_debug.max():.4f}], mean: {y_pred_unscaled_debug.mean():.4f}")

# Scaler 파라미터 확인
print("\n=== Scaler 파라미터 ===")
print(f"Scaler mean: {scaler.mean_[0]:.4f}")
print(f"Scaler scale (std): {scaler.scale_[0]:.4f}")

# 체크포인트에 저장된 scaler와 비교
if 'scaler' in checkpoint.get('hyper_parameters', {}):
    print("\n=== 체크포인트의 Scaler ===")
    print("Scaler가 hyper_parameters에 저장되어 있습니다.")
else:
    print("\n=== 체크포인트의 Scaler ===")
    print("Scaler가 hyper_parameters에 저장되어 있지 않습니다.")
    
# 모델의 scaler 확인
print(f"\n=== 모델의 Scaler ===")
if model.scaler is not None:
    print(f"Model scaler mean: {model.scaler.mean_[0]:.4f}")
    print(f"Model scaler scale: {model.scaler.scale_[0]:.4f}")
else:
    print("Model에 scaler가 없습니다.")

=== 값 범위 확인 (Scaled) ===
y_true range: [-2.4107, 13.8111], mean: -0.0719
y_pred range: [-1.4805, 12.0856], mean: -0.0512

=== 값 범위 확인 (Unscaled) ===
y_true_unscaled range: [-592.2729, 4617.8955], mean: 158.9160
y_pred_unscaled range: [-293.5198, 4063.6907], mean: 165.5388

=== Scaler 파라미터 ===
Scaler mean: 181.9943
Scaler scale (std): 321.1833

=== 체크포인트의 Scaler ===
Scaler가 hyper_parameters에 저장되어 있지 않습니다.

=== 모델의 Scaler ===
Model scaler mean: 181.9943
Model scaler scale: 321.1833


In [18]:
# 고속도로 센서만 필터링 (shape: total_samples, horizon, num_highway_sensors)
y_true_highway = y_true[:, :, highway_indices]
y_pred_highway = y_pred[:, :, highway_indices]
is_missing_highway = is_missing[:, :, highway_indices]

print(f"Highway predictions shape: {y_pred_highway.shape}")

# Flatten
y_true_flat = y_true_highway.flatten()
y_pred_flat = y_pred_highway.flatten()
is_missing_flat = is_missing_highway.flatten()

# Valid mask (보간되지 않은 원본 데이터만)
valid_mask = ~is_missing_flat

total_points = len(y_true_flat)
valid_points = valid_mask.sum()
interpolated_points = total_points - valid_points

print(f"\n=== Highway Sensor Statistics ===")
print(f"Total points: {total_points}")
print(f"Valid (original) points: {valid_points} ({valid_points/total_points*100:.1f}%)")
print(f"Interpolated points (excluded): {interpolated_points} ({interpolated_points/total_points*100:.1f}%)")

Highway predictions shape: (1081, 12, 55)

=== Highway Sensor Statistics ===
Total points: 713460
Valid (original) points: 448932 (62.9%)
Interpolated points (excluded): 264528 (37.1%)


In [19]:
# sMAPE 계산 함수
def smape(y_true, y_pred):
    """Symmetric Mean Absolute Percentage Error (sMAPE)"""
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    nonzero_denom = denominator != 0
    if not nonzero_denom.any():
        return 0.0
    return np.mean(np.abs(y_true[nonzero_denom] - y_pred[nonzero_denom]) / denominator[nonzero_denom]) * 100

# Scaled 메트릭 계산 (보간되지 않은 데이터만)
y_true_valid = y_true_flat[valid_mask]
y_pred_valid = y_pred_flat[valid_mask]

mae_scaled = mean_absolute_error(y_true_valid, y_pred_valid)
rmse_scaled = np.sqrt(mean_squared_error(y_true_valid, y_pred_valid))

# MAPE (0으로 나누기 방지)
nonzero_mask = y_true_valid != 0
mape_scaled = np.mean(np.abs(
    (y_true_valid[nonzero_mask] - y_pred_valid[nonzero_mask]) / y_true_valid[nonzero_mask]
)) * 100 if nonzero_mask.any() else 0.0

# sMAPE
smape_scaled = smape(y_true_valid, y_pred_valid)

print(f"\n=== Highway Sensors - Scaled Metrics (Excluding Interpolated) ===")
print(f"MAE:   {mae_scaled:.4f}")
print(f"RMSE:  {rmse_scaled:.4f}")
print(f"MAPE:  {mape_scaled:.2f}%")
print(f"sMAPE: {smape_scaled:.2f}%")


=== Highway Sensors - Scaled Metrics (Excluding Interpolated) ===
MAE:   0.5104
RMSE:  0.7509
MAPE:  1445.43%
sMAPE: 59.09%


In [20]:
# Unscaled (원본 스케일) 메트릭 계산
def inverse_transform(data, scaler):
    """Inverse transform scaled data back to original scale."""
    original_shape = data.shape
    flat_data = data.reshape(-1, 1)
    unscaled = scaler.inverse_transform(flat_data)
    return unscaled.reshape(original_shape)

# 원본 스케일로 변환 (shape: total_samples, horizon, num_highway_sensors)
y_true_unscaled = inverse_transform(y_true_highway, scaler)
y_pred_unscaled = inverse_transform(y_pred_highway, scaler)

# Flatten 및 valid mask 적용
y_true_unscaled_flat = y_true_unscaled.flatten()
y_pred_unscaled_flat = y_pred_unscaled.flatten()
y_true_unscaled_valid = y_true_unscaled_flat[valid_mask]
y_pred_unscaled_valid = y_pred_unscaled_flat[valid_mask]

# Unscaled 메트릭 계산
mae_unscaled = mean_absolute_error(y_true_unscaled_valid, y_pred_unscaled_valid)
rmse_unscaled = np.sqrt(mean_squared_error(y_true_unscaled_valid, y_pred_unscaled_valid))

# MAPE (0으로 나누기 방지)
nonzero_mask_unscaled = y_true_unscaled_valid != 0
mape_unscaled = np.mean(np.abs(
    (y_true_unscaled_valid[nonzero_mask_unscaled] - y_pred_unscaled_valid[nonzero_mask_unscaled]) 
    / y_true_unscaled_valid[nonzero_mask_unscaled]
)) * 100 if nonzero_mask_unscaled.any() else 0.0

# sMAPE
smape_unscaled = smape(y_true_unscaled_valid, y_pred_unscaled_valid)

print(f"\n=== Highway Sensors - Original Scale Metrics (Excluding Interpolated) ===")
print(f"MAE:   {mae_unscaled:.4f} vehicles/hour")
print(f"RMSE:  {rmse_unscaled:.4f} vehicles/hour")
print(f"MAPE:  {mape_unscaled:.2f}%")
print(f"sMAPE: {smape_unscaled:.2f}%")


=== Highway Sensors - Original Scale Metrics (Excluding Interpolated) ===
MAE:   163.9389 vehicles/hour
RMSE:  241.1818 vehicles/hour
MAPE:  56396700.00%
sMAPE: 40.61%


In [21]:
# 전체 / 고속도로 / 일반도로 비교

# === 전체 센서 === (shape: total_samples, horizon, num_nodes)
y_true_all_flat = y_true.flatten()
y_pred_all_flat = y_pred.flatten()
is_missing_all_flat = is_missing.flatten()
valid_mask_all = ~is_missing_all_flat

y_true_all_unscaled = inverse_transform(y_true, scaler).flatten()
y_pred_all_unscaled = inverse_transform(y_pred, scaler).flatten()
y_true_all_valid = y_true_all_unscaled[valid_mask_all]
y_pred_all_valid = y_pred_all_unscaled[valid_mask_all]

mae_all = mean_absolute_error(y_true_all_valid, y_pred_all_valid)
rmse_all = np.sqrt(mean_squared_error(y_true_all_valid, y_pred_all_valid))
nonzero_all = y_true_all_valid != 0
mape_all = np.mean(np.abs(
    (y_true_all_valid[nonzero_all] - y_pred_all_valid[nonzero_all]) / y_true_all_valid[nonzero_all]
)) * 100 if nonzero_all.any() else 0.0
smape_all = smape(y_true_all_valid, y_pred_all_valid)

# === 일반도로 센서 === (shape: total_samples, horizon, num_normal_sensors)
y_true_normal = y_true[:, :, normal_road_indices]
y_pred_normal = y_pred[:, :, normal_road_indices]
is_missing_normal = is_missing[:, :, normal_road_indices]

y_true_normal_flat = y_true_normal.flatten()
y_pred_normal_flat = y_pred_normal.flatten()
is_missing_normal_flat = is_missing_normal.flatten()
valid_mask_normal = ~is_missing_normal_flat

y_true_normal_unscaled = inverse_transform(y_true_normal, scaler).flatten()
y_pred_normal_unscaled = inverse_transform(y_pred_normal, scaler).flatten()
y_true_normal_valid = y_true_normal_unscaled[valid_mask_normal]
y_pred_normal_valid = y_pred_normal_unscaled[valid_mask_normal]

mae_normal = mean_absolute_error(y_true_normal_valid, y_pred_normal_valid)
rmse_normal = np.sqrt(mean_squared_error(y_true_normal_valid, y_pred_normal_valid))
nonzero_normal = y_true_normal_valid != 0
mape_normal = np.mean(np.abs(
    (y_true_normal_valid[nonzero_normal] - y_pred_normal_valid[nonzero_normal]) / y_true_normal_valid[nonzero_normal]
)) * 100 if nonzero_normal.any() else 0.0
smape_normal = smape(y_true_normal_valid, y_pred_normal_valid)

# === 결과 출력 ===
print("=" * 75)
print("DCRNN Performance Comparison (Original Scale)")
print("=" * 75)
print(f"\n{'Metric':<10} {'All Sensors':>15} {'Highway':>15} {'Normal Road':>15}")
print("-" * 75)
print(f"{'MAE':<10} {mae_all:>15.4f} {mae_unscaled:>15.4f} {mae_normal:>15.4f}")
print(f"{'RMSE':<10} {rmse_all:>15.4f} {rmse_unscaled:>15.4f} {rmse_normal:>15.4f}")
print(f"{'MAPE (%)':<10} {mape_all:>15.2f} {mape_unscaled:>15.2f} {mape_normal:>15.2f}")
print(f"{'sMAPE (%)':<10} {smape_all:>15.2f} {smape_unscaled:>15.2f} {smape_normal:>15.2f}")
print("=" * 75)

print(f"\n=== Data Points Summary ===")
print(f"All sensors valid points: {valid_mask_all.sum():,}")
print(f"Highway valid points: {valid_mask.sum():,}")
print(f"Normal road valid points: {valid_mask_normal.sum():,}")

DCRNN Performance Comparison (Original Scale)

Metric         All Sensors         Highway     Normal Road
---------------------------------------------------------------------------
MAE                53.1699        163.9389         50.9744
RMSE               99.3116        241.1818         94.3681
MAPE (%)       35087600.00     56396700.00     34665237.50
sMAPE (%)            87.30           40.61           88.22

=== Data Points Summary ===
All sensors valid points: 23,098,614
Highway valid points: 448,932
Normal road valid points: 22,649,682
