In [None]:
# data gen model training
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import wandb
from torch_geometric.data import Batch
from torch_geometric.loader import DataLoader
from stnet.stnet import SpatioTemporalModel
from utils.utils import ModelConfig, FocalLoss, train_model
from utils.dataset import AugmentedScenarioGraphDataset
from utils.dataset import ScenarioGraphDataset
from utils.dataset_utils import NODE_TYPE_MAP, EDGE_TYPE_MAP

if __name__ == '__main__':
    # 设备配置
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.set_float32_matmul_precision('high')

    # 滑动窗口配置
    window_size = 30
    step_size = 1
    num_classes = 7

    # 初始化配置
    config = ModelConfig(
        num_layers=3,
        num_features=12 + len(NODE_TYPE_MAP),
        hidden_dim=16,
        num_relations=8,
        edge_dim=8,
        num_epochs=200,
        num_classes=3,
        window_size=30,
        step_size=1,
        learning_rate=0.001,
        weight_decay=0.01,
        graph_num_head=4,
        pool_ratio=0.9,
        num_seed_points=1,
        lstm_hidden_dim=16,
        lstm_bidirectional=False,
        lstm_num_layers=1,
        fc_dropout=0.3,
        batch_size=16,
        gnn_query_dim=16,
        gnn_num_head=1
    )

    # initialize wandb
    wandb.init(project="spatio-temporal", config=config.__dict__)

    working_dir = "./"
    model_dir = f"{working_dir}/model"
    dataset_dir = "./dataset"

    # 数据集配置
    scene_datasets = {
        # "main_secondary": ['/main-secondary'],
        # "secondary_road": ["/secondary-road"],
        "motor": [
            "/secondary-road",
            '/main-secondary'
        ],
        # "ebike": ['/ebike'],
        # "total": [
        #     "/secondary-road",
        #     '/main-secondary',
        #     '/ebike'
        # ]
    }

    # 训练循环
    for scene_name, data_dirs in scene_datasets.items():
        print(f"\n=== Training Scene: {scene_name} ===")
        
        data_dirs = [f"{dataset_dir}\driving-scene-graph{d}" for d in data_dirs]
        
        # 数据集加载
        cache_path = f"{dataset_dir}/cache/{scene_name}_{window_size}_{step_size}_{num_classes}_dataset_pre_cache.pkl"
        generator_model_path = f"{working_dir}/model/data_aug/{scene_name}_{window_size}_{step_size}_{num_classes}_generator_model.pth"

        # 加载数据集
        dataset = ScenarioGraphDataset(data_dirs, config, device, cache_path)
        weights = dataset.compute_class_weights()
        
        # 划分训练集和验证集
        train_size = int(0.8 * len(dataset))
        val_size = len(dataset) - train_size
        train_dataset, val_dataset = torch.utils.data.random_split(dataset, 
                                                                    [train_size, val_size], 
                                                                    generator=torch.Generator().manual_seed(42))

        # 创建数据加载器
        train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

        # 模型初始化
        model = SpatioTemporalModel(config).to(device)
        optimizer = optim.AdamW(model.parameters(), lr=1e-2, weight_decay=1e-4)
        # criterion = FocalLoss(alpha=0.25, gamma=2)
        criterion = nn.CrossEntropyLoss(weight=weights.to(device))
        print(f"class weights: {weights}")

        # 开始训练
        train_model(
            model=model,
            train_loader=train_loader,
            val_loader=train_loader,
            optimizer=optimizer,
            criterion=criterion,
            device=device,
            config=config,
            checkpoint_dir=f"model/checkpoint/",
            bestmodel_dir=f"model/data_aug/",
            scene_name=scene_name,
            patience=15
        )



In [None]:
# 多卡训练
import os
import torch
import torch.distributed as dist
import torch.nn as nn
import torch.optim as optim
from torch.utils.data.distributed import DistributedSampler
from torch_geometric.loader import DataLoader
from stnet.stnet import SpatioTemporalModel
from utils.utils import ModelConfig, FocalLoss, train_model
from utils.dataset import ScenarioGraphDataset
from utils.dataset_utils import NODE_TYPE_MAP, EDGE_TYPE_MAP
from torch.nn.parallel import DistributedDataParallel as DDP

if "RANK" not in os.environ:
    os.environ["RANK"] = "0"
if "WORLD_SIZE" not in os.environ:
    os.environ["WORLD_SIZE"] = "1"
if "LOCAL_RANK" not in os.environ:
    os.environ["LOCAL_RANK"] = "0"
os.environ["MASTER_ADDR"] = "localhost"
os.environ["MASTER_PORT"] = "5678"

def setup_ddp():
    # 使用 NCCL 后端，LOCAL_RANK 环境变量必须已设置
    dist.init_process_group(backend="nccl")
    local_rank = int(os.environ["LOCAL_RANK"])
    torch.cuda.set_device(local_rank)
    return torch.device("cuda", local_rank), local_rank

if __name__ == '__main__':
    # 分布式设置
    device, local_rank = setup_ddp()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.set_float32_matmul_precision('high')

    # 滑动窗口配置
    window_size = 30
    step_size = 1
    num_classes = 7

    # 初始化配置
    config = ModelConfig(
        num_layers=3,
        num_features=12 + len(NODE_TYPE_MAP),
        hidden_dim=16,
        num_relations=8,
        edge_dim=8,
        num_epochs=200,
        num_classes=num_classes,
        window_size=window_size,
        step_size=step_size,
        learning_rate=0.001,
        weight_decay=0.01,
        graph_num_head=4,
        pool_ratio=0.9,
        num_seed_points=1,
        lstm_hidden_dim=16,
        lstm_bidirectional=False,
        lstm_num_layers=1,
        fc_dropout=0.3,
        batch_size=16,
        gnn_query_dim=16,
        gnn_num_head=1
    )

    working_dir = "./"
    model_dir = f"{working_dir}/model"
    dataset_dir = "/kaggle/input/driving-scene-graph/dataset"

    # 数据集配置
    scene_datasets = {
        "motor": [
            "/secondary-road",
            '/main-secondary'
        ],
        "ebike": ['/ebike'],
    }

    # 训练循环
    for scene_name, data_dirs in scene_datasets.items():
        print(f"\n=== Training Scene: {scene_name} ===")
        # initialize wandb
        import wandb
        wandb.init(project="spatio-temporal", config=config.__dict__)
        
        data_dirs = [f"{dataset_dir}{d}" for d in data_dirs]
        
        # 数据集加载
        cache_path = f"/kaggle/working/cache/{scene_name}_{window_size}_{step_size}_{num_classes}_dataset_pre_cache.pkl"
        generator_model_path = f"{working_dir}/model/data_aug/{scene_name}_{window_size}_{step_size}_{num_classes}_generator_model.pth"

        # 加载数据集
        dataset = ScenarioGraphDataset(data_dirs, config, device, cache_path)
        weights = dataset.compute_class_weights()
        
        # 划分训练集和验证集
        train_size = int(0.8 * len(dataset))
        val_size = len(dataset) - train_size
        train_dataset, val_dataset = torch.utils.data.random_split(
            dataset, [train_size, val_size], 
            generator=torch.Generator().manual_seed(42)
        )
        
        # 使用 DistributedSampler
        train_sampler = DistributedSampler(train_dataset)
        val_sampler = DistributedSampler(val_dataset)
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False, sampler=train_sampler)
        val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, sampler=val_sampler)

        # 模型初始化
        model = SpatioTemporalModel(config).to(device)
        model = DDP(model, device_ids=[local_rank], output_device=local_rank)
        optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
        # criterion = FocalLoss(alpha=0.25, gamma=2)
        criterion = nn.CrossEntropyLoss(weight=weights.to(device))
        print(f"class weights: {weights}")

        # 开始训练
        train_model(
            model=model,
            train_loader=train_loader,
            val_loader=val_loader,
            optimizer=optimizer,
            criterion=criterion,
            device=device,
            config=config,
            checkpoint_dir=f"model/checkpoint/",
            bestmodel_dir=f"model/data_aug/",
            scene_name=scene_name,
            patience=15
        )
        
    # 结束时清理分布式进程组
    dist.destroy_process_group()

In [None]:
# grid search
from torch_geometric.nn import DataParallel
import os
import torch
import wandb
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch_geometric.data import Batch
from torch_geometric.loader import DataLoader
from stnet.stnet import SpatioTemporalModel
from utils.utils import ModelConfig, FocalLoss, train_model
from utils.dataset import AugmentedScenarioGraphDataset
from utils.dataset_utils import NODE_TYPE_MAP, EDGE_TYPE_MAP


def train(config=None):
    # 设备配置
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.set_float32_matmul_precision('high')
    with wandb.init(config=None, mode="offline"):
        configW = wandb.config
        print(configW)  

        # 初始化配置
        config = ModelConfig(
            num_layers=configW.num_layers,
            num_features=12 + len(NODE_TYPE_MAP),
            hidden_dim=configW.hidden_dim,
            num_relations=8,
            edge_dim=8,
            num_epochs=5,
            num_classes=7,
            window_size=configW.window_size,
            step_size=1,
            learning_rate=configW.learning_rate,
            weight_decay=configW.weight_decay,
            graph_num_head=configW.graph_num_head,
            pool_ratio=configW.pool_ratio,
            num_seed_points=4,
            lstm_hidden_dim=configW.lstm_hidden_dim,
            lstm_bidirectional=configW.lstm_bidirectional,
            lstm_num_layers=configW.lstm_num_layers,
            fc_dropout=configW.fc_dropout,
            batch_size=16,
        )

        working_dir = "./"
        model_dir = f"{working_dir}/model"
        dataset_dir = "./dataset"

        # 数据集配置
        scene_datasets = {
            "motor": [
                "/secondary-road",
                '/main-secondary'
            ]
        }
        scene_name = "motor"
        data_dirs = scene_datasets[scene_name]
        
        print(f"\n=== Training Scene: {scene_name} ===")
        
        # 数据集路径配置
        data_dirs = [f"{dataset_dir}/driving-scene-graph{d}" for d in data_dirs]

        # 数据集加载
        cache_path = f"{dataset_dir}/cache/{scene_name}_{config.window_size}_{config.step_size}_{config.num_classes}_dataset_aug_cache.pkl"
        generator_model_path = f"{working_dir}/model/data_aug/{scene_name}_{30}_{1}_{2 if config.num_classes == 3 else 3}_{16}_{config.num_classes}_generator_model.pth"

        # 加载数据集
        dataset = AugmentedScenarioGraphDataset(
            root_dirs=data_dirs,
            window_size=config.window_size,
            step_size=config.step_size,
            generator_model_path=generator_model_path,
            node_feature_dim=config.num_features,
            device=device,
            cache_path=cache_path,
            num_classes=config.num_classes
        )
        weights = dataset.compute_class_weights()

        # 划分训练集和验证集
        train_size = int(0.8 * len(dataset))
        val_size = len(dataset) - train_size
        train_dataset, val_dataset = torch.utils.data.random_split(dataset, 
                                                                    [train_size, val_size], 
                                                                    generator=torch.Generator().manual_seed(0))

        # 创建数据加载器
        train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False)

        # 模型初始化
        model = SpatioTemporalModel(config).to(device)
        optimizer = optim.AdamW(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
        criterion = nn.CrossEntropyLoss(weight=weights.to(device))

        # 开始训练
        train_model(
            model=model,
            train_loader=train_loader,
            val_loader=val_loader,
            optimizer=optimizer,
            criterion=criterion,
            device=device,
            config=config,
            checkpoint_dir=f"{model_dir}/checkpoint",
            bestmodel_dir=f"{model_dir}/bestmodel",
            scene_name=scene_name,
            patience=15
        )

if __name__ == '__main__':
    import yaml
    with open("config/sweep.yaml", "r", encoding="utf-8") as file:
        config_dict = yaml.safe_load(file)
    # config_dict = {'method': 'bayes', 
    #                'name': 'hyperparameter-sweep-stnet', 
    #                'metric': {'name': 'validation_f1', 'goal': 'maximize'}, 
    #                'parameters': {'batch_size': 16, 'fc_dropout': 0.1, 'graph_dropout': 1, 'graph_num_head': 1, 'hidden_dim': 16, 'learning_rate': 0.001, 'lstm_bidirectional': True, 'lstm_hidden_dim': 16, 'lstm_num_layers': 1, 'num_classes': 3, 'num_layers': 2, 'num_seed_points': 2, 'pool_ratio': 0.5, 'step_size': 1, 'weight_decay': 0.0001, 'window_size': 10}}
    sweep_id = wandb.sweep(config_dict, project="pytorch-sweeps-demo")
    wandb.agent(sweep_id, train, count=1)

In [1]:
# model training
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import wandb
from torch_geometric.data import Batch
from torch_geometric.loader import DataLoader
from stnet.stnet import SpatioTemporalModel
from utils.utils import ModelConfig, FocalLoss, train_model
from utils.dataset import AugmentedScenarioGraphDataset
from utils.dataset import ScenarioGraphDataset
from utils.dataset_utils import NODE_TYPE_MAP, EDGE_TYPE_MAP

if __name__ == '__main__':
    # 设备配置
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.set_float32_matmul_precision('high')

    # 滑动窗口配置
    window_size = 30
    step_size = 1
    num_classes = 3

    # 初始化配置
    config = ModelConfig(
        num_layers=3,
        num_features=12 + len(NODE_TYPE_MAP),
        hidden_dim=16,
        num_relations=8,
        edge_dim=8,
        num_epochs=200,
        num_classes=num_classes,
        window_size=window_size,
        step_size=step_size,
        learning_rate=0.001,
        weight_decay=0.01,
        graph_num_head=4,
        pool_ratio=0.9,
        num_seed_points=1,
        lstm_hidden_dim=16,
        lstm_bidirectional=False,
        lstm_num_layers=1,
        fc_dropout=0.3,
        batch_size=16,
        gnn_query_dim=16,
        gnn_num_head=1
    )

    working_dir = "./"
    model_dir = f"{working_dir}/model"
    dataset_dir = "./dataset"

    # 数据集配置
    scene_datasets = {
        "motor": [
            "/secondary-road",
            '/main-secondary'
        ]
    }
    scene_name = "motor"
    data_dirs = scene_datasets[scene_name]
    
    print(f"\n=== Training Scene: {scene_name} ===")
    # initialize wandb
    wandb.init(project="stnet-model-train", config=config.__dict__, mode="offline")
    
    # 数据集路径配置
    data_dirs = [f"{dataset_dir}\driving-scene-graph{d}" for d in data_dirs]

    # 数据集加载
    cache_path = f"{dataset_dir}/cache/{scene_name}_{config.window_size}_{config.step_size}_{config.num_classes}_dataset_aug_cache.pkl"
    generator_model_path = f"{working_dir}/model/data_aug/{scene_name}_{30}_{1}_{2 if config.num_classes == 3 else 3}_{16}_{config.num_classes}_generator_model.pth"

    # 加载数据集
    dataset = AugmentedScenarioGraphDataset(
        root_dirs=data_dirs,
        window_size=config.window_size,
        step_size=config.step_size,
        generator_model_path=generator_model_path,
        node_feature_dim=config.num_features,
        device=device,
        cache_path=cache_path,
        num_classes=config.num_classes
    )
    weights = dataset.compute_class_weights()
    
    # 划分训练集和验证集
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, 
                                                                [train_size, val_size], 
                                                                generator=torch.Generator().manual_seed(0))

    # 创建数据加载器
    train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False)

    # 模型初始化
    model = SpatioTemporalModel(config).to(device)
    optimizer = optim.AdamW(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
    criterion = nn.CrossEntropyLoss(weight=weights.to(device))
    print(f"class weights: {weights}")

    # 开始训练
    train_model(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        optimizer=optimizer,
        criterion=criterion,
        device=device,
        config=config,
        checkpoint_dir=f"{model_dir}/checkpoint",
        bestmodel_dir=f"{model_dir}/bestmodel",
        scene_name=scene_name,
        patience=30
    )


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.



=== Training Scene: motor ===


未找到缓存文件，重新加载数据...
Extracted num_layers: 2, hidden_dim: 16,num_classes: 3


RuntimeError: Error(s) in loading state_dict for SpatioTemporalModel:
	Missing key(s) in state_dict: "graph_encoder.feat_encoder.weight", "graph_encoder.feat_encoder.bias", "graph_encoder.feat_decoder.weight", "graph_encoder.feat_decoder.bias". 
	Unexpected key(s) in state_dict: "graph_encoder.feat_transform.weight", "graph_encoder.feat_transform.bias". 
	size mismatch for graph_encoder.convs.0.weight: copying a param with shape torch.Size([8, 16, 16]) from checkpoint, the shape in current model is torch.Size([8, 1, 16, 16]).
	size mismatch for graph_encoder.convs.1.weight: copying a param with shape torch.Size([8, 16, 16]) from checkpoint, the shape in current model is torch.Size([8, 1, 16, 16]).

In [9]:
from stnet.stnet import SpatioTemporalModel
from utils.utils import ModelConfig
from utils.dataset_utils import NODE_TYPE_MAP
# 初始化配置
config = ModelConfig(
    num_layers=3,
    num_features=12 + len(NODE_TYPE_MAP),
    hidden_dim=16,
    num_relations=8,
    edge_dim=8,
    num_epochs=200,
    num_classes=7,
    window_size=30,
    step_size=1,
    learning_rate=0.001,
    weight_decay=0.01,
    graph_num_head=4,
    pool_ratio=0.9,
    num_seed_points=1,
    lstm_hidden_dim=16,
    lstm_bidirectional=False,
    lstm_num_layers=1,
    fc_dropout=0.3,
    batch_size=16,
    gnn_query_dim=16,
    gnn_num_head=1,
    gnn_num_block=4
)
model = SpatioTemporalModel(config)
# 如果只想统计可训练的参数数量（即 requires_grad=True 的参数）
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"可训练参数数量：{trainable_params}")
print(model)

可训练参数数量：48569
SpatioTemporalModel(
  (graph_encoder): GraphEncoder(
    (convs): ModuleList(
      (0-2): 3 x RGATConv(256, 16, heads=1)
    )
    (norms): ModuleList(
      (0-2): 3 x GraphNorm(256)
    )
    (feat_encoder): Linear(in_features=16, out_features=256, bias=True)
    (feat_decoder): Linear(in_features=256, out_features=16, bias=True)
    (norms_input): GraphNorm(256)
    (pool): SAGPooling(GraphConv, 16, ratio=0.9, multiplier=1.0)
    (aggr): SetTransformerAggregation(16, num_seed_points=1, heads=4, layer_norm=False, dropout=0)
  )
  (temporal_attn): TemporalAttention(
    (query): Linear(in_features=16, out_features=16, bias=True)
    (key): Linear(in_features=16, out_features=16, bias=True)
    (value): Linear(in_features=16, out_features=16, bias=True)
  )
  (lstm): LSTM(16, 16, batch_first=True)
  (classifier): Sequential(
    (0): Linear(in_features=32, out_features=16, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=16, 

In [1]:
import pandas as pd
from itertools import product

# Define the grid of hyperparameters to try
param_grid = {
    'num_layers': [2, 3, 5],
    'hidden_dim': [16, 32],
    'lstm_num_layers': [1, 2, 3, 5],
    'lstm_hidden_dim': [16, 32],
    'graph_num_head': [2, 4],
}

results = []
# List of hyperparameter names to record
keys_to_keep = list(param_grid.keys())

# Loop over all combinations
for num_layers, hidden_dim, lstm_layers, lstm_hidden_dim, graph_num_head in product(
    param_grid['num_layers'],
    param_grid['hidden_dim'],
    param_grid['lstm_num_layers'],
    param_grid['lstm_hidden_dim'],
    param_grid['graph_num_head']
):
    config_temp = ModelConfig(
        num_layers=num_layers,
        num_features=12 + len(NODE_TYPE_MAP),
        hidden_dim=hidden_dim,
        num_relations=8,
        edge_dim=8,
        num_epochs=200,
        num_classes=3,
        window_size=30,
        step_size=1,
        learning_rate=0.001,
        weight_decay=0.01,
        graph_num_head=graph_num_head,
        pool_ratio=0.9,
        num_seed_points=4,
        lstm_hidden_dim=lstm_hidden_dim,
        lstm_bidirectional=False,
        lstm_num_layers=lstm_layers,
        fc_dropout=0.3,
        batch_size=16,
    )
    # Initialize model with the new config
    model_temp = SpatioTemporalModel(config_temp).to(device)
    param_count = sum(p.numel() for p in model_temp.parameters() if p.requires_grad)
    
    # Record only the grid search hyperparameters
    config_dict = {key: getattr(config_temp, key) for key in keys_to_keep}
    config_dict['model_params'] = param_count
    results.append(config_dict)

df = pd.DataFrame(results)
print("\n=== Grid Search Hyperparameters and Model Parameter Count ===")
df


NameError: name 'ModelConfig' is not defined