In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils import weight_norm
import math
import numpy as np

class TemporalEmbedding(nn.Module):
    def __init__(self, args):
        super(TemporalEmbedding, self).__init__()
        # Define embeddings in this order:
        self.args=args
        self.d_model=self.args.d_model
        self.weekday_emd = nn.Embedding(7, self.d_model)   # 1-7 -> 0-6
        self.hour_emd = nn.Embedding(24, self.d_model)      # 0-23
        self.minute_emd = nn.Embedding(60, self.d_model)    # 0-59
        self.weekend_emd = nn.Embedding(8, self.d_model)       #0 - 6  (0 is workday, 1 is weekend, 2 is yd 3 is cj 4 is qm 5 is 51 6 is dw 7 is gq)
        self.day_emd = nn.Embedding(31, self.d_model)       # 1-31 -> 0-30
        self.month_emd = nn.Embedding(12, self.d_model)     # 1-12 -> 0-11
        
    def forward(self, temp_feat):
        # Extract features in init order: [weekday, time_hour, time_minute, day_type, time_day, time_month]
        weekday = temp_feat[:,:, 0].long()   # Convert to long for embedding
        hour = temp_feat[:,:, 1].long()
        minute = temp_feat[:,:, 2].long()
        weekend = temp_feat[:,:, 3].long()
        day = temp_feat[:,:, 4].long()
        month = temp_feat[:,:, 5].long()

        # Generate embeddings
        weekday_emb = self.weekday_emd(weekday)
        hour_emb = self.hour_emd(hour)
        minute_emb = self.minute_emd(minute)
        peak_emb = self.weekend_emd(weekend)
        day_emb = self.day_emd(day)
        month_emb = self.month_emd(month)

        # Combine embeddings by summation
        combined_emb = weekday_emb + hour_emb + minute_emb + peak_emb + day_emb + month_emb
        return combined_emb

class CityEmbedding(nn.Module):
    def __init__(self, args):
        super(CityEmbedding, self).__init__()
        self.args = args
        self.city_list = self.args.city_list
        self.d_model = self.args.d_model
        self.city_embedding = nn.Embedding(len(self.city_list), self.d_model)
        self.city_idx_map = {city: idx for idx, city in enumerate(self.city_list)}  

    def forward(self, citys):
        # Map city names to indices
        city_indices = torch.tensor([self.city_idx_map[int(city.item())] for city in citys], dtype=torch.long).to(citys.device)
        
        # Get embeddings for the city indices
        city_embeddings = self.city_embedding(city_indices)
        return city_embeddings
    
class ValueEmbedding(nn.Module):
    def __init__(self, args):
        super(ValueEmbedding, self).__init__()
        self.args = args
        self.d_model = self.args.d_model
        self.enc_in = self.args.enc_in
        self.value_embedding = nn.Linear(self.enc_in, self.d_model)
        self.mask_token = nn.Parameter(torch.randn(1, 1, self.d_model))  # 随机初始化
        
    def forward(self, flow_feats, mask_matrix_expanded):
        x = self.value_embedding(flow_feats)
        return x

class StaticEmbedding(nn.Module):
    def __init__(self, args):
        super(StaticEmbedding, self).__init__()
        self.args = args
        self.d_model = self.args.d_model
        self.static_emb_in = self.args.static_emb_in
        self.projector = nn.Linear(self.static_emb_in, self.d_model)
    
    def forward(self, static_feat):
        static_feat = F.normalize(static_feat, p=2, dim=-1)
        return self.projector(static_feat)
    
from transformers import AutoModel
from transformers import GPT2Model, GPT2LMHeadModel, GPT2Config
class ST_LLM(nn.Module):
    def __init__(self, args):
        super(ST_LLM, self).__init__()
        self.args = args
        self.d_model = self.args.d_model
        # 初始化嵌入层（需传入相关参数）
        self.temp_embedding = TemporalEmbedding(self.args)
        self.city_embedding = CityEmbedding(self.args)
        self.value_embedding = ValueEmbedding(self.args)
        self.static_embedding = StaticEmbedding(self.args)
        # 加载 Qwen 模型并提取 Transformer 层
        self.gpt2_config = GPT2Config.from_pretrained('%s/gpt2' % self.args.root_path)
        self.gpt2 = GPT2Model.from_pretrained('%s/gpt2' % self.args.root_path, config=self.gpt2_config)
        
        self.decoder_pred = nn.Sequential(
            nn.Linear(self.d_model, args.enc_in)
        )
        self.mask_ratio = 0.5

    
    def forward_finetune(self, batch_flow, batch_static_feat, batch_time_features, batch_city_id):
        B, L, C = batch_flow.shape
        value_embedding = self.value_embedding(batch_flow, None) #[b,l,d]
        time_embedding = self.temp_embedding(batch_time_features) #[b,l,d]
        city_embedding = self.city_embedding(batch_city_id).unsqueeze(1) #[b,1,d]
        static_embedding = self.static_embedding(batch_static_feat).unsqueeze(1)#[b,1,d]
        
        masked_x = time_embedding + value_embedding       
        masked_x = torch.concat([city_embedding, static_embedding, masked_x], dim=-2)#[b,l+2,d]
        
        for block in self.gpt2.h:
            masked_x = block(masked_x)[0]

        # final layer norm
        masked_x = self.gpt2.ln_f(masked_x)
        return masked_x[:, -1, :]

class ST_LLM_finetune(nn.Module):
    def __init__(self, args, backbone):
        super(ST_LLM_finetune, self).__init__()
        self.args = args
        
        self.backbone = backbone
        
        for param in self.backbone.parameters():
            param.requires_grad = False

        # # 解冻 backbone 的最后 2 个 transformer block
        # # 假设 self.backbone.gpt2.h 是一个 ModuleList 或 list，可索引
        # for block in self.backbone.gpt2.h[-2:]:
        #     for p in block.parameters():
        #         p.requires_grad = True

        # # 可选：解冻最后的 layernorm（ln_f），有助于稳定微调
        # if hasattr(self.backbone.gpt2, "ln_f"):
        #     for p in self.backbone.gpt2.ln_f.parameters():
        #         p.requires_grad = True
        
        self.d_model = self.args.d_model
        # 768 = 512 + 256
        self.decoder_pred = nn.Sequential(*[ 
            nn.Linear(self.d_model, self.d_model//2),
            nn.GELU(),
            nn.Linear(self.d_model//2, self.d_model//4),
            nn.GELU(),
            nn.Linear(self.d_model//4, self.d_model//8),
            nn.GELU(),
            nn.Linear(self.d_model//8, 1)
        ])
        
    
    def forward(self, batch_flow, batch_static_feat, batch_time_features, batch_city_id):
        emb = self.backbone.forward_finetune(batch_flow, batch_static_feat, batch_time_features, batch_city_id)#[b, d]
        pred = self.decoder_pred(emb)
        return pred
    


import os 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
from torch import nn
import torch.nn.functional as F
import warnings
import calendar
import datetime

from sklearn.model_selection import train_test_split
from datetime import datetime, timedelta 
from torch.utils.data import Dataset, DataLoader
from sklearn import preprocessing
from tqdm.notebook import tqdm
from odps import ODPS
import warnings

warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.6f' % x)

import multiprocessing
n_process = multiprocessing.cpu_count()

data_columns = ['sample_id', 'traffic_number', 'adcode', 'time_feat', 'dym_feat_feat', 'emb']

time_feat_col = 'time_feat'
dynamic_feat_col = 'dym_feat_feat' 
static_feat_col = 'emb' 
city_col = 'adcode'
label_col = 'traffic_number'

def collate_fn(data):
    "" 
    ""
    df_base = pd.DataFrame(data, columns=data_columns)

    time_list = []
    dynamic_feature_list = []
    static_feature_list = []
    city_list = []
    label_list = []

    for time_feat, dynamic_feat, static_feat, city, label in zip(df_base[time_feat_col].values.tolist(), 
                                                                 df_base[dynamic_feat_col].values.tolist(),
                                                                 df_base[static_feat_col].values.tolist(), 
                                                                 df_base[city_col].values.tolist(), 
                                                                 df_base[label_col].values.tolist()
                                                                ):
        # tensor getted
        time_feat_tensor = torch.from_numpy(np.asarray([[int(i) for i in x.split(' ')] for x in time_feat.split(';')] )) #L * 6
        dynamic_feat_tensor = torch.from_numpy(np.asarray([[round(float(i), 6) for i in x.split(' ')] for x in dynamic_feat.split(';')] )) # L * 5
        static_feat_tnesor = torch.from_numpy(np.asarray( [round(float(i), 6) for i in static_feat.split(',')] )) # 128
        city_tensor = torch.from_numpy(np.asarray(int(city)))  # 1
        label_tensor = torch.from_numpy(np.asarray(float(label)))  # 1

        # tesnoe append
        time_list.append(time_feat_tensor)
        dynamic_feature_list.append(dynamic_feat_tensor)
        static_feature_list.append(static_feat_tnesor)
        city_list.append(city_tensor)
        label_list.append(label_tensor)
    # tesnor mgrge
    time_feat = torch.stack(time_list)
    dynamic_feat = torch.stack(dynamic_feature_list)
    static_feat = torch.stack(static_feature_list)
    city_feat = torch.stack(city_list)
    label = torch.stack(label_list).unsqueeze(-1)
    
#     time_feat[:, :, 3] = torch.where(time_feat[:, :, 3] > 0, torch.ones_like(time_feat[:, :, 3]), time_feat[:, :, 3])
#     print(time_feat.shape, dynamic_feat.shape, static_feat.shape, city_feat.shape)
    return dynamic_feat, static_feat, time_feat, city_feat, label


import os
import torch
import torch.nn as nn
import logging
import traceback
import numpy as np
from tqdm import tqdm
# import wandb

from torch.utils.data import Dataset, DataLoader

import torch.distributed as dist 
import torch.backends.cudnn as cudnn
from torch.nn.parallel import DistributedDataParallel as DDP

from transformers import get_cosine_schedule_with_warmup

# 将 utils 所在目录添加到 Python 路径
import sys
from pathlib import Path
sys.path.append('/home/xuyongchao.xyc/notebook/test/gpt_pretrain_sft')

from utils.odps_table import OdpsTableDataset
from utils.utils import EarlyStopping
from utils.utils import save_loss_image, save_losses_to_csv, plot_tensor_distribution
from utils.utils import make_log_dir, init_logger, plot_tensor_distribution
from utils.odps_op import get_df, execute_sql_df, load_to_tables, delete_partiton


def get_data(args, ep=0, if_train=1):
    os.environ["ACCESS_ID"] = "LTAI5tRd4jERQur4o5NEFRc9"
    os.environ["ACCESS_KEY"] = "uulSnBT6vY7QQbmvs8ZQTnZ7hQXrjz"
    slice_id = int(os.environ.get('RANK', 0))
    slice_count = int(os.environ.get('WORLD_SIZE', 1))
    if if_train==1:
        odps_table="odps://autonavi_traffic_brain/tables/{}/epoch={}".format(args.train_table, ep%5)
        print('odps_table is ', odps_table)
            
        def convert_sample_to_features(sample, **kwargs):
            features = sample
            return features
            
        data_set = OdpsTableDataset(odps_table, slice_id, slice_count)

        data_loader = torch.utils.data.DataLoader(
                        data_set,
                        batch_size=args.batch_size,
                        sampler=None,
                        shuffle=False,
                        num_workers=args.num_workers,
                        drop_last=False,
                        pin_memory=True,
                        prefetch_factor=8,
                        collate_fn=lambda x: collate_fn(x))
    elif if_train==2:
        odps_table="odps://autonavi_traffic_brain/tables/{}".format(args.test_table)
        print('odps_table is ', odps_table)

        data_set = OdpsTableDataset(odps_table, slice_id, slice_count)

        data_loader = torch.utils.data.DataLoader(
                        data_set,
                        batch_size=args.test_batch_size,
                        sampler=None,
                        shuffle=False,
                        num_workers=args.test_num_workers,
                        drop_last=False,
                        pin_memory=True,
                        prefetch_factor=1,
                        collate_fn=lambda x: collate_fn(x))
    elif if_train==3:
        odps_table="odps://autonavi_traffic_brain/tables/{}".format(args.oot_table)
        print('odps_table is ', odps_table)

        data_set = OdpsTableDataset(odps_table, slice_id, slice_count)

        data_loader = torch.utils.data.DataLoader(
                        data_set,
                        batch_size=args.test_batch_size,
                        sampler=None,
                        shuffle=False,
                        num_workers=args.test_num_workers,
                        drop_last=False,
                        pin_memory=True,
                        prefetch_factor=1,
                        collate_fn=lambda x: collate_fn(x))
        
    return data_set, data_loader

2025-10-23 15:56:13,218  INFO /opt/conda/envs/python3.10/lib/python3.10/site-packages/lake/__init__.py:23]  init AILake env...
INFO:lake:init AILake env...
2025-10-23 15:56:13,220  INFO /opt/conda/envs/python3.10/lib/python3.10/site-packages/lake/__init__.py:37]  find libpangudfs_pangu_client.so.1 in the path /opt/conda/envs/python3.10.13/lib/python3.10/site-packages/
INFO:lake:find libpangudfs_pangu_client.so.1 in the path /opt/conda/envs/python3.10.13/lib/python3.10/site-packages/
2025-10-23 15:56:13,222  INFO /opt/conda/envs/python3.10/lib/python3.10/site-packages/lake/__init__.py:37]  find libfslib_plugin_pangu.so in the path /opt/conda/envs/python3.10.13/lib/python3.10/site-packages/
INFO:lake:find libfslib_plugin_pangu.so in the path /opt/conda/envs/python3.10.13/lib/python3.10/site-packages/
2025-10-23 15:56:13,302  INFO /opt/conda/envs/python3.10/lib/python3.10/site-packages/lake/__init__.py:69]  init AILake env done.
INFO:lake:init AILake env done.


2025-10-23 15:56:13,325 - MDL: INFO - ModelHubClient:  The model_hub_client is currently initialized without using a configuration file.
 If you are uploading a model, you need to set the environment variable _NEBULA_MODEL to the MOS version you wish to upload.
 If the initialization fails with a prompt requiring user_id, you need to set the environment variable _NEBULA_USER_ID to your employee number.


In [2]:
from torch import optim
import time
from tqdm import tqdm
import logging


def vali(model, vali_loader, criterion, device):
    total_loss, num = 0, 0
    preds, trues = [], []
    model.eval()
    with torch.no_grad():
        for i,batch in enumerate(tqdm(vali_loader)):
            batch_flow, batch_static_feat, batch_time_features, batch_city_id, batch_label = batch
            # Move data to GPU
            batch_flow = batch_flow[:, :, :args.enc_in].float().to(device)

            batch_static_feat = batch_static_feat.float().to(device)
            batch_time_features = batch_time_features.to(device)
            batch_city_id = batch_city_id.to(device)
            batch_label = batch_label.float().to(device)
            
            outputs = torch.clamp(model(batch_flow, batch_static_feat, batch_time_features, batch_city_id), min = 0)
            
            loss = criterion(outputs, batch_label)
            
            num += batch_flow.shape[0]
            
            total_loss += loss.item() * batch_flow.shape[0]
            
            preds.append(outputs.detach().cpu().numpy())
            trues.append(batch_label.detach().cpu().numpy())
        
        logging.info("vali avg loss is ={:.4f}".format(total_loss / num))

    return  total_loss/num, preds, trues


def train(args, device):
    backbone = ST_LLM(args).to(device)

    checkpoint_path = os.path.join(args.pretrain_checkpoint_path, args.pretrain_pth)  #预训练参数
    print('load checkpoint:', checkpoint_path)
    checkpoint = torch.load(checkpoint_path, map_location=device)  

    #恢复模型和优化器状态
    backbone.load_state_dict(checkpoint['model_state_dict'])
    # print(backbone)
    
    model = ST_LLM_finetune(args, backbone).to(device)
    
    model_optim = optim.Adam(model.parameters(), lr=args.learning_rate)
    
    start_epoch = 0

    sft_ckpt_path = os.path.join(args.checkpoints_path, '%s_best.pth' % args.task_name)
    
    if os.path.exists(sft_ckpt_path):
        sft_ckpt = torch.load(sft_ckpt_path, map_location=device)  
        model.load_state_dict(sft_ckpt['model_state_dict'])
        start_epoch = sft_ckpt['epoch']
        model_optim.load_state_dict(sft_ckpt['optimizer_state_dict'])
    
    criterion = nn.MSELoss() #nn.SmoothL1Loss(beta=0.5)
    
    time_now = time.time()
    
    early_stopping = EarlyStopping(args, verbose=True)  
    
    # 记录训练开始信息
    logging.info(f"Starting training with {args.epochs} epochs")
    logging.info(f"Learning rate: {args.learning_rate}")
    
    _, test_loader = get_data(args, ep=0, if_train=2)
    _, oot_loader = get_data(args, ep=0, if_train=3)

    
    for epoch in range(start_epoch, args.epochs):
        dataset, train_loader = get_data(args, ep=0)
        
        iter_count, train_loss, num = 0, 0, 0
        
        model.train()

        epoch_time = time.time()
        
        for i, batch in enumerate(train_loader):
            model_optim.zero_grad()

            batch_flow, batch_static_feat, batch_time_features, batch_city_id, batch_label = batch
            batch_flow = batch_flow[:, 1:, :args.enc_in].float().to(device)
            batch_static_feat = batch_static_feat.float().to(device) #(N,D)
            batch_time_features = batch_time_features[:, 1:, :].to(device)#(N,L,D)
            batch_city_id = batch_city_id.to(device) #(N,1)
            batch_label = batch_label.float().to(device) #(N,1)
                        
            outputs = model(batch_flow, batch_static_feat, batch_time_features, batch_city_id)
            loss = criterion(outputs, batch_label)
            train_loss += loss.item() * batch_flow.shape[0]
            num += batch_flow.float().shape[0]
            
            loss.backward()
            
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0)
            
            model_optim.step()

            if iter_count % 200 == 0:
                for name, param in model.named_parameters():
                    if param.grad is not None:
                        print(f"{name} | Grad Mean: {param.grad.abs().mean().item():.4f}")
                    
                print("Model Output Min/Max:", outputs.min().item(), outputs.max().item())
                print("Label Min/Max:", batch_label.min().item(), batch_label.max().item())
                logging.info("{} batch train loss is {:.4f}. train avg loss is ={:.4f}".format(iter_count, loss.item(), train_loss / num))
            
            iter_count += 1
            
        avg_train_loss = train_loss / num
        #test_loss, _, _ = vali(model, test_loader, criterion, device)
        oot_loss, _, _ = vali(model, oot_loader, criterion, device)
        test_loss = oot_loss
        epoch_duration = time.time() - epoch_time
        # 记录epoch信息
        epoch_log = (
            f"Epoch: {epoch+1}/{args.epochs} | "
            f"Time: {epoch_duration:.2f}s | "
            f"Train Loss: {avg_train_loss:.7f} | "
            f"Test Loss: {test_loss:.7f} | "
            f"OOT Loss: {oot_loss:.7f}"
        )
        logging.info(epoch_log)
        
        early_stopping(oot_loss, model, model_optim, epoch+1)
        
        if early_stopping.early_stop:
            stop_msg = "Early stopping triggered at epoch {}".format(epoch+1)
            logging.info(stop_msg)
            print(stop_msg)  # 确保控制台也显示
            break

    
    return model

In [3]:
import os
import argparse
parser = argparse.ArgumentParser(description='LLM')

parser.add_argument('--task_name', type=str, required=False, default='raod_cross_1min_flow_pretrian', help='task_name')

parser.add_argument('--batch_size', type=int, required=False, default=128, help='batch_size')
parser.add_argument('--learning_rate', type=float, required=False, default=2e-4, help='learning_rate')
parser.add_argument('--epochs', type=int, required=False, default=10, help='epochs')
parser.add_argument('--patience', type=int, required=False, default=7, help='patience')
parser.add_argument('--mask_rate', type=float, required=False, default=0.5, help='mask_rate')
parser.add_argument('--num_workers', type=int, required=False, default=8, help='num_workers')

parser.add_argument('--test_batch_size', type=int, required=False, default=128, help='test_batch_size')
parser.add_argument('--test_num_workers', type=int, required=False, default=8, help='test_num_workers')

parser.add_argument('--train_table', type=str, required=False, default='tb_inter_pretrain_data_all_feat_ep', help='train_table')
parser.add_argument('--test_table', type=str, required=False, default='tb_inter_pretrain_data_all_feat_ep_test', help='test_table')
parser.add_argument('--oot_table', type=str, required=False, default='tb_inter_pretrain_data_all_feat_ep_oot', help='test_table')


parser.add_argument('--root_path', type=str, required=False, 
                    default='/data/oss_bucket_0/tmp/xuyongchao.xyc/road_cross_flow_pretrain', help='root_path')
parser.add_argument('--checkpoints_path', type=str, required=False, 
                    default='/data/oss_bucket_0/tmp/xuyongchao.xyc/checkpoints/sft', help='checkpints_path')
parser.add_argument('--pretrain_checkpoint_path', type=str, required=False, 
                    default='/data/oss_bucket_0/tmp/xuyongchao.xyc/checkpoints/pretrain', help='checkpints_path')

parser.add_argument('--pretrain_pth', type=str, required=False, 
                    default='raod_cross_1min_flow_pretrian_ep4.pth', help='pretrian_pth')

parser.add_argument('--enc_in', type=int, required=False, default=5, help='enc_in')
parser.add_argument('--d_model', type=int, required=False, default=768, help='d_model')
parser.add_argument('--static_emb_in', type=int, required=False, default=128, help='static_emb_in')

# args =  parser.parse_args()

args =  parser.parse_args(args=[])

args.batch_size=15000
args.learning_rate = 1e-4
args.epochs = 100
args.patience = 7
args.mask_rate=0.5
args.num_workers=2
args.test_batch_size=15000
args.test_num_workers=2


# unclean + traffic_number<300 + 高德覆盖
args.train_table='tb_inter_sft_train_data_ep_v5'
args.test_table='tb_inter_sft_test_data_v5'
args.oot_table='tb_inter_sft_oot_data_v5'

# unclean
# args.train_table='tb_inter_sft_train_data_ep_v3'
# args.test_table='tb_inter_sft_test_data_v3'
# args.oot_table='tb_inter_sft_oot_data_v3'

args.root_path='/data/oss_bucket_0/tmp/xuyongchao.xyc/road_cross_flow_pretrain'

new = 1

if new == 1:
    args.city_list = [320800,321000,321200,321300,330500,330800,331000,350200,110000,120000,130100,130200,130900,140100,140200,150100,150200,150400,150600,150800,210100,210200,210500,220100,220200,230100,230300,310000,320100,320200,320300,320400,320500,320600,320900,321100,330100,330200,330300,330400,330600,330700,330900,331100,340100,340200,350100,350500,350600,360100,360400,360500,370100,370200,370300,370400,370600,371100,371300,371500,371600,410100,410300,410500,410600,410800,411000,411300,420100,420200,420500,421000,421200,430100,430900,440100,440300,440400,440500,440600,440700,440800,441300,441500,441800,441900,442000,450100,450200,450300,450700,451100,460100,460200,460400,500000,510100,510300,510400,510500,510600,510700,510900,511000,511100,511300,511400,511500,511700,512000,513300,520100,520500,530100,532600,532900,540100,610100,610700,620100,620300,620400,620900,621000,630100,640100,650100]
    args.checkpoints_path='/data/oss_bucket_0/tmp/xuyongchao.xyc/checkpoints/sft/unclean_fsd_new'
    args.pretrain_checkpoint_path = '/data/oss_bucket_0/tmp/xuyongchao.xyc/checkpoints/pretrain_new/5kw_data'
    args.pretrain_pth = 'raod_cross_1min_flow_pretrian_5.pth'
elif new == 2:
    args.city_list = [320800,321000,321200,321300,330500,330800,331000,350200,110000,120000,130100,130200,130900,140100,140200,150100,150200,150400,150600,150800,210100,210200,210500,220100,220200,230100,230300,310000,320100,320200,320300,320400,320500,320600,320900,321100,330100,330200,330300,330400,330600,330700,330900,331100,340100,340200,350100,350500,350600,360100,360400,360500,370100,370200,370300,370400,370600,371100,371300,371500,371600,410100,410300,410500,410600,410800,411000,411300,420100,420200,420500,421000,421200,430100,430900,440100,440300,440400,440500,440600,440700,440800,441300,441500,441800,441900,442000,450100,450200,450300,450700,451100,460100,460200,460400,500000,510100,510300,510400,510500,510600,510700,510900,511000,511100,511300,511400,511500,511700,512000,513300,520100,520500,530100,532600,532900,540100,610100,610700,620100,620300,620400,620900,621000,630100,640100,650100]
    args.checkpoints_path='/data/oss_bucket_0/tmp/xuyongchao.xyc/checkpoints/sft/unclean_fsd_cl'
    args.pretrain_checkpoint_path = '/data/oss_bucket_0/tmp/xuyongchao.xyc/checkpoints/pretrain_cl'
    args.pretrain_pth = 'raod_cross_1min_flow_mask_cl_pretrian_ep4.pth'
elif new == 3:
    args.city_list = [320800,321000,321200,321300,330500,330800,331000,350200,110000,120000,130100,130200,130900,140100,140200,150100,150200,150400,150600,150800,210100,210200,210500,220100,220200,230100,230300,310000,320100,320200,320300,320400,320500,320600,320900,321100,330100,330200,330300,330400,330600,330700,330900,331100,340100,340200,350100,350500,350600,360100,360400,360500,370100,370200,370300,370400,370600,371100,371300,371500,371600,410100,410300,410500,410600,410800,411000,411300,420100,420200,420500,421000,421200,430100,430900,440100,440300,440400,440500,440600,440700,440800,441300,441500,441800,441900,442000,450100,450200,450300,450700,451100,460100,460200,460400,500000,510100,510300,510400,510500,510600,510700,510900,511000,511100,511300,511400,511500,511700,512000,513300,520100,520500,530100,532600,532900,540100,610100,610700,620100,620300,620400,620900,621000,630100,640100,650100]
    args.checkpoints_path='/data/oss_bucket_0/tmp/xuyongchao.xyc/checkpoints/sft/v6_fsd_reshape'
    args.pretrain_checkpoint_path = '/data/oss_bucket_0/tmp/xuyongchao.xyc/checkpoints/pretrain_reshape'
    args.pretrain_pth = 'raod_cross_1min_flow_pretrian_6.pth'
else:
    args.city_list = [110000, 330100, 330700, 370100, 350200]
    args.checkpoints_path='/data/oss_bucket_0/tmp/xuyongchao.xyc/checkpoints/sft/unclean_fsd'  # 5city
    args.pretrain_checkpoint_path = '/data/oss_bucket_0/tmp/xuyongchao.xyc/checkpoints/pretrain'
    args.pretrain_pth = 'raod_cross_1min_flow_pretrian_ep5.pth'

args.enc_in=1
args.d_model=768
args.static_emb_in=128

args.task_name='%s_sft' % args.pretrain_pth.split('.')[0]

# 'raod_cross_1min_flow_pretrian_ep1.pth'
# 'llm_gpt2_backbone_5_citys_0.1.pth'

def main():
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu' #'cuda:0'
    project_name = "ST-LLM-SFT"
    
    log_dir = make_log_dir()
    init_logger(log_dir)
    logging.info(f"Using device: {device}")

    try:
        model = train(args, device)
    except KeyboardInterrupt:
        logging.info("\nSFTing interrupted by user.")
    finally:
        logging.info(f"Saving losses to {log_dir}.")
        logging.info(f"Finishing training.")


if __name__ == "__main__":
    try:
        main()
    except Exception as e:
        logging.error("\n" + traceback.format_exc())

INFO:root:Using device: cuda:0
2025-10-23 15:57:08,846 - rank0 - INFO - Using device: cuda:0


2025-10-23 15:57:08,850 - MDL: INFO - ModelHubClient:  The model_hub_client is currently initialized without using a configuration file.
 If you are uploading a model, you need to set the environment variable _NEBULA_MODEL to the MOS version you wish to upload.
 If the initialization fails with a prompt requiring user_id, you need to set the environment variable _NEBULA_USER_ID to your employee number.
load checkpoint: /data/oss_bucket_0/tmp/xuyongchao.xyc/checkpoints/pretrain_new/5kw_data/raod_cross_1min_flow_pretrian_5.pth


INFO:root:Starting training with 100 epochs
2025-10-23 15:57:38,491 - rank0 - INFO - Starting training with 100 epochs
INFO:root:Learning rate: 0.0001
2025-10-23 15:57:38,500 - rank0 - INFO - Learning rate: 0.0001
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_test_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_test_data_v5', 'table partition': None}
2025-10-23 15:57:38,509 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_test_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_test_data_v5', 'table partition': None}


odps_table is  odps://autonavi_traffic_brain/tables/tb_inter_sft_test_data_v5


INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=20251023155738b6111b2124d1f069target project=autonavi_traffic_brain table=tb_inter_sft_test_data_v5 partition_spec=None>
2025-10-23 15:57:39,269 - rank0 - INFO - Tunnel session created: <TableDownloadSession id=20251023155738b6111b2124d1f069target project=autonavi_traffic_brain table=tb_inter_sft_test_data_v5 partition_spec=None>
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-23 15:57:39,568 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}


total row_count:214770
odps_table is  odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5


INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=202510231557391ada1c0b21b04b92target project=autonavi_traffic_brain table=tb_inter_sft_oot_data_v5 partition_spec=None>
2025-10-23 15:57:39,983 - rank0 - INFO - Tunnel session created: <TableDownloadSession id=202510231557391ada1c0b21b04b92target project=autonavi_traffic_brain table=tb_inter_sft_oot_data_v5 partition_spec=None>
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-23 15:57:40,376 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}


total row_count:214770
odps_table is  odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0


INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=2025102315574044151b2124cf4e49target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>
2025-10-23 15:57:41,418 - rank0 - INFO - Tunnel session created: <TableDownloadSession id=2025102315574044151b2124cf4e49target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>


total row_count:4540037


INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-23 15:57:42,487 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-23 15:57:42,526 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=20251023155742943a272123d4b1

decoder_pred.0.weight | Grad Mean: 0.0014
decoder_pred.0.bias | Grad Mean: 0.0037
decoder_pred.2.weight | Grad Mean: 0.0072
decoder_pred.2.bias | Grad Mean: 0.0177
decoder_pred.4.weight | Grad Mean: 0.0148
decoder_pred.4.bias | Grad Mean: 0.1018
decoder_pred.6.weight | Grad Mean: 0.1942
decoder_pred.6.bias | Grad Mean: 4.0761
Model Output Min/Max: -0.09069769829511642 0.27698180079460144
Label Min/Max: 0.0 101.0


INFO:root:200 batch train loss is 18.8492. train avg loss is =30.1369
2025-10-23 17:05:58,036 - rank0 - INFO - 200 batch train loss is 18.8492. train avg loss is =30.1369


decoder_pred.0.weight | Grad Mean: 0.0023
decoder_pred.0.bias | Grad Mean: 0.0021
decoder_pred.2.weight | Grad Mean: 0.0031
decoder_pred.2.bias | Grad Mean: 0.0021
decoder_pred.4.weight | Grad Mean: 0.0031
decoder_pred.4.bias | Grad Mean: 0.0037
decoder_pred.6.weight | Grad Mean: 0.0680
decoder_pred.6.bias | Grad Mean: 0.1060
Model Output Min/Max: -0.20272600650787354 79.30084991455078
Label Min/Max: 0.0 117.0


  0%|          | 0/15 [00:00<?, ?it/s]INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-23 17:19:09,336 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-23 17:19:09,334 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=202510231719094f3c272123bed715target project=autonavi_traffic_brain table=tb_inter

odps_table is  odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0


INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=2025102317205974101b2124ddabb6target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>
2025-10-23 17:20:59,929 - rank0 - INFO - Tunnel session created: <TableDownloadSession id=2025102317205974101b2124ddabb6target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>


total row_count:4540037


INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-23 17:21:00,640 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-23 17:21:00,642 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=20251023172101c53b272124007b

decoder_pred.0.weight | Grad Mean: 0.0021
decoder_pred.0.bias | Grad Mean: 0.0040
decoder_pred.2.weight | Grad Mean: 0.0034
decoder_pred.2.bias | Grad Mean: 0.0037
decoder_pred.4.weight | Grad Mean: 0.0041
decoder_pred.4.bias | Grad Mean: 0.0059
decoder_pred.6.weight | Grad Mean: 0.0623
decoder_pred.6.bias | Grad Mean: 0.1573
Model Output Min/Max: -0.2880746126174927 67.42263793945312
Label Min/Max: 0.0 101.0


INFO:root:200 batch train loss is 17.0696. train avg loss is =19.3211
2025-10-23 17:45:33,428 - rank0 - INFO - 200 batch train loss is 17.0696. train avg loss is =19.3211


decoder_pred.0.weight | Grad Mean: 0.0028
decoder_pred.0.bias | Grad Mean: 0.0030
decoder_pred.2.weight | Grad Mean: 0.0048
decoder_pred.2.bias | Grad Mean: 0.0025
decoder_pred.4.weight | Grad Mean: 0.0054
decoder_pred.4.bias | Grad Mean: 0.0033
decoder_pred.6.weight | Grad Mean: 0.1167
decoder_pred.6.bias | Grad Mean: 0.0922
Model Output Min/Max: -0.6092530488967896 95.69090270996094
Label Min/Max: 0.0 117.0


  0%|          | 0/15 [00:00<?, ?it/s]INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-23 17:57:42,461 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-23 17:57:42,464 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=202510231757424f3c272123c4d24ctarget project=autonavi_traffic_brain table=tb_inter

odps_table is  odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0


INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=20251023180419f73a272123ed92f5target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>
2025-10-23 18:04:19,688 - rank0 - INFO - Tunnel session created: <TableDownloadSession id=20251023180419f73a272123ed92f5target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>


total row_count:4540037


INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-23 18:04:20,170 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-23 18:04:20,172 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=20251023180420f73a272123ed93

decoder_pred.0.weight | Grad Mean: 0.0029
decoder_pred.0.bias | Grad Mean: 0.0051
decoder_pred.2.weight | Grad Mean: 0.0050
decoder_pred.2.bias | Grad Mean: 0.0049
decoder_pred.4.weight | Grad Mean: 0.0051
decoder_pred.4.bias | Grad Mean: 0.0066
decoder_pred.6.weight | Grad Mean: 0.0773
decoder_pred.6.bias | Grad Mean: 0.1732
Model Output Min/Max: -0.8303700685501099 90.72480773925781
Label Min/Max: 0.0 101.0


INFO:root:200 batch train loss is 16.9064. train avg loss is =18.7868
2025-10-23 18:38:20,185 - rank0 - INFO - 200 batch train loss is 16.9064. train avg loss is =18.7868


decoder_pred.0.weight | Grad Mean: 0.0029
decoder_pred.0.bias | Grad Mean: 0.0027
decoder_pred.2.weight | Grad Mean: 0.0034
decoder_pred.2.bias | Grad Mean: 0.0017
decoder_pred.4.weight | Grad Mean: 0.0050
decoder_pred.4.bias | Grad Mean: 0.0017
decoder_pred.6.weight | Grad Mean: 0.1249
decoder_pred.6.bias | Grad Mean: 0.0287
Model Output Min/Max: -1.197485089302063 84.0482406616211
Label Min/Max: 0.0 117.0


  0%|          | 0/15 [00:00<?, ?it/s]INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-23 18:50:24,949 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-23 18:50:24,953 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=2025102318502520161b2124f3be5dtarget project=autonavi_traffic_brain table=tb_inter

odps_table is  odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0


INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=202510231856352d171b2124f79b79target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>
2025-10-23 18:56:35,420 - rank0 - INFO - Tunnel session created: <TableDownloadSession id=202510231856352d171b2124f79b79target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>


total row_count:4540037


INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-23 18:56:36,126 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-23 18:56:36,127 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=2025102318563683161b2124e38e

decoder_pred.0.weight | Grad Mean: 0.0028
decoder_pred.0.bias | Grad Mean: 0.0058
decoder_pred.2.weight | Grad Mean: 0.0059
decoder_pred.2.bias | Grad Mean: 0.0050
decoder_pred.4.weight | Grad Mean: 0.0079
decoder_pred.4.bias | Grad Mean: 0.0059
decoder_pred.6.weight | Grad Mean: 0.1642
decoder_pred.6.bias | Grad Mean: 0.1426
Model Output Min/Max: -0.7433791756629944 94.4826431274414
Label Min/Max: 0.0 101.0


INFO:root:200 batch train loss is 16.7797. train avg loss is =18.6102
2025-10-23 19:17:01,527 - rank0 - INFO - 200 batch train loss is 16.7797. train avg loss is =18.6102


decoder_pred.0.weight | Grad Mean: 0.0035
decoder_pred.0.bias | Grad Mean: 0.0032
decoder_pred.2.weight | Grad Mean: 0.0055
decoder_pred.2.bias | Grad Mean: 0.0027
decoder_pred.4.weight | Grad Mean: 0.0071
decoder_pred.4.bias | Grad Mean: 0.0032
decoder_pred.6.weight | Grad Mean: 0.2087
decoder_pred.6.bias | Grad Mean: 0.0854
Model Output Min/Max: -0.7314090728759766 103.07990264892578
Label Min/Max: 0.0 117.0


  0%|          | 0/15 [00:00<?, ?it/s]INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-23 19:22:13,085 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-23 19:22:13,089 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=202510231922134e80382124a80a88target project=autonavi_traffic_brain table=tb_inter

odps_table is  odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0


INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=2025102319261153181b2124fbb619target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>
2025-10-23 19:26:11,956 - rank0 - INFO - Tunnel session created: <TableDownloadSession id=2025102319261153181b2124fbb619target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>


total row_count:4540037


INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-23 19:26:12,338 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-23 19:26:12,340 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=202510231926125e80382124a266

decoder_pred.0.weight | Grad Mean: 0.0029
decoder_pred.0.bias | Grad Mean: 0.0051
decoder_pred.2.weight | Grad Mean: 0.0056
decoder_pred.2.bias | Grad Mean: 0.0047
decoder_pred.4.weight | Grad Mean: 0.0066
decoder_pred.4.bias | Grad Mean: 0.0052
decoder_pred.6.weight | Grad Mean: 0.1223
decoder_pred.6.bias | Grad Mean: 0.1257
Model Output Min/Max: -2.0127151012420654 72.48096466064453
Label Min/Max: 0.0 101.0


INFO:root:200 batch train loss is 16.9784. train avg loss is =18.5079
2025-10-23 19:40:33,381 - rank0 - INFO - 200 batch train loss is 16.9784. train avg loss is =18.5079


decoder_pred.0.weight | Grad Mean: 0.0027
decoder_pred.0.bias | Grad Mean: 0.0031
decoder_pred.2.weight | Grad Mean: 0.0052
decoder_pred.2.bias | Grad Mean: 0.0025
decoder_pred.4.weight | Grad Mean: 0.0060
decoder_pred.4.bias | Grad Mean: 0.0028
decoder_pred.6.weight | Grad Mean: 0.1513
decoder_pred.6.bias | Grad Mean: 0.0740
Model Output Min/Max: -1.1853156089782715 92.31761169433594
Label Min/Max: 0.0 117.0


  0%|          | 0/15 [00:00<?, ?it/s]INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-23 20:05:27,506 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-23 20:05:27,507 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=202510232005288c82382124e6aad2target project=autonavi_traffic_brain table=tb_inter

odps_table is  odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0


INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=2025102320094067d81c0b239f8274target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>
2025-10-23 20:09:40,474 - rank0 - INFO - Tunnel session created: <TableDownloadSession id=2025102320094067d81c0b239f8274target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>


total row_count:4540037


INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-23 20:09:40,912 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-23 20:09:40,915 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=2025102320094113171b2124ffb4

decoder_pred.0.weight | Grad Mean: 0.0033
decoder_pred.0.bias | Grad Mean: 0.0054
decoder_pred.2.weight | Grad Mean: 0.0060
decoder_pred.2.bias | Grad Mean: 0.0048
decoder_pred.4.weight | Grad Mean: 0.0081
decoder_pred.4.bias | Grad Mean: 0.0059
decoder_pred.6.weight | Grad Mean: 0.1994
decoder_pred.6.bias | Grad Mean: 0.1492
Model Output Min/Max: -0.45897528529167175 94.56583404541016
Label Min/Max: 0.0 101.0


INFO:root:200 batch train loss is 16.8246. train avg loss is =18.4735
2025-10-23 21:08:30,778 - rank0 - INFO - 200 batch train loss is 16.8246. train avg loss is =18.4735


decoder_pred.0.weight | Grad Mean: 0.0032
decoder_pred.0.bias | Grad Mean: 0.0062
decoder_pred.2.weight | Grad Mean: 0.0053
decoder_pred.2.bias | Grad Mean: 0.0064
decoder_pred.4.weight | Grad Mean: 0.0064
decoder_pred.4.bias | Grad Mean: 0.0079
decoder_pred.6.weight | Grad Mean: 0.1136
decoder_pred.6.bias | Grad Mean: 0.2091
Model Output Min/Max: -1.210370659828186 104.14425659179688
Label Min/Max: 0.0 117.0


  0%|          | 0/15 [00:00<?, ?it/s]INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-23 21:39:39,202 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-23 21:39:39,204 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=202510232139394580382124be56c8target project=autonavi_traffic_brain table=tb_inter

odps_table is  odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0


INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=20251023214340f63b272123f06ffetarget project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>
2025-10-23 21:43:40,503 - rank0 - INFO - Tunnel session created: <TableDownloadSession id=20251023214340f63b272123f06ffetarget project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>


total row_count:4540037


INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-23 21:43:40,964 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-23 21:43:40,966 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=20251023214341ee161b21250743

decoder_pred.0.weight | Grad Mean: 0.0030
decoder_pred.0.bias | Grad Mean: 0.0024
decoder_pred.2.weight | Grad Mean: 0.0039
decoder_pred.2.bias | Grad Mean: 0.0016
decoder_pred.4.weight | Grad Mean: 0.0050
decoder_pred.4.bias | Grad Mean: 0.0016
decoder_pred.6.weight | Grad Mean: 0.1467
decoder_pred.6.bias | Grad Mean: 0.0387
Model Output Min/Max: -0.37304121255874634 83.34292602539062
Label Min/Max: 0.0 101.0


INFO:root:200 batch train loss is 17.1480. train avg loss is =18.4096
2025-10-23 22:40:56,909 - rank0 - INFO - 200 batch train loss is 17.1480. train avg loss is =18.4096


decoder_pred.0.weight | Grad Mean: 0.0032
decoder_pred.0.bias | Grad Mean: 0.0054
decoder_pred.2.weight | Grad Mean: 0.0060
decoder_pred.2.bias | Grad Mean: 0.0039
decoder_pred.4.weight | Grad Mean: 0.0076
decoder_pred.4.bias | Grad Mean: 0.0040
decoder_pred.6.weight | Grad Mean: 0.2252
decoder_pred.6.bias | Grad Mean: 0.1011
Model Output Min/Max: -0.5262460708618164 131.2394256591797
Label Min/Max: 0.0 117.0


  0%|          | 0/15 [00:00<?, ?it/s]INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-23 22:53:49,394 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-23 22:53:49,392 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=202510232253509e82382124fea8fftarget project=autonavi_traffic_brain table=tb_inter

odps_table is  odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0


INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=20251023225635663c272124000a91target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>
2025-10-23 22:56:35,270 - rank0 - INFO - Tunnel session created: <TableDownloadSession id=20251023225635663c272124000a91target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>


total row_count:4540037


INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-23 22:56:36,392 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-23 22:56:36,394 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=20251023225636ff3a272123f661

decoder_pred.0.weight | Grad Mean: 0.0032
decoder_pred.0.bias | Grad Mean: 0.0043
decoder_pred.2.weight | Grad Mean: 0.0065
decoder_pred.2.bias | Grad Mean: 0.0043
decoder_pred.4.weight | Grad Mean: 0.0072
decoder_pred.4.bias | Grad Mean: 0.0044
decoder_pred.6.weight | Grad Mean: 0.1249
decoder_pred.6.bias | Grad Mean: 0.0993
Model Output Min/Max: -0.4912557005882263 83.0332260131836
Label Min/Max: 0.0 101.0


INFO:root:200 batch train loss is 16.7446. train avg loss is =18.3411
2025-10-24 00:15:39,369 - rank0 - INFO - 200 batch train loss is 16.7446. train avg loss is =18.3411


decoder_pred.0.weight | Grad Mean: 0.0027
decoder_pred.0.bias | Grad Mean: 0.0034
decoder_pred.2.weight | Grad Mean: 0.0036
decoder_pred.2.bias | Grad Mean: 0.0033
decoder_pred.4.weight | Grad Mean: 0.0033
decoder_pred.4.bias | Grad Mean: 0.0038
decoder_pred.6.weight | Grad Mean: 0.0878
decoder_pred.6.bias | Grad Mean: 0.0998
Model Output Min/Max: -0.84548020362854 122.96324920654297
Label Min/Max: 0.0 117.0


  0%|          | 0/15 [00:00<?, ?it/s]INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-24 00:38:39,258 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-24 00:38:39,256 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=202510240038398b82382125158d59target project=autonavi_traffic_brain table=tb_inter

odps_table is  odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0


INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=202510240044286f181b21252aa046target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>
2025-10-24 00:44:29,070 - rank0 - INFO - Tunnel session created: <TableDownloadSession id=202510240044286f181b21252aa046target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>


total row_count:4540037


INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-24 00:44:29,567 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-24 00:44:29,569 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=20251024004429acafb10b21f4a4

decoder_pred.0.weight | Grad Mean: 0.0033
decoder_pred.0.bias | Grad Mean: 0.0042
decoder_pred.2.weight | Grad Mean: 0.0038
decoder_pred.2.bias | Grad Mean: 0.0038
decoder_pred.4.weight | Grad Mean: 0.0028
decoder_pred.4.bias | Grad Mean: 0.0040
decoder_pred.6.weight | Grad Mean: 0.0653
decoder_pred.6.bias | Grad Mean: 0.0986
Model Output Min/Max: -0.7924730777740479 84.44371032714844
Label Min/Max: 0.0 101.0


INFO:root:200 batch train loss is 16.7885. train avg loss is =18.2838
2025-10-24 02:58:37,341 - rank0 - INFO - 200 batch train loss is 16.7885. train avg loss is =18.2838


decoder_pred.0.weight | Grad Mean: 0.0036
decoder_pred.0.bias | Grad Mean: 0.0066
decoder_pred.2.weight | Grad Mean: 0.0058
decoder_pred.2.bias | Grad Mean: 0.0046
decoder_pred.4.weight | Grad Mean: 0.0062
decoder_pred.4.bias | Grad Mean: 0.0045
decoder_pred.6.weight | Grad Mean: 0.1830
decoder_pred.6.bias | Grad Mean: 0.1117
Model Output Min/Max: -0.21135380864143372 116.22444915771484
Label Min/Max: 0.0 117.0


  0%|          | 0/15 [00:00<?, ?it/s]INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-24 04:41:43,758 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-24 04:41:43,761 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=20251024044144eb6b1d0b2230fe9dtarget project=autonavi_traffic_brain table=tb_inter

odps_table is  odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0


INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=202510240450114d15272105d88a66target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>
2025-10-24 04:50:11,436 - rank0 - INFO - Tunnel session created: <TableDownloadSession id=202510240450114d15272105d88a66target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>


total row_count:4540037


INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-24 04:50:12,160 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-24 04:50:12,162 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=2025102404501290823821252e15

decoder_pred.0.weight | Grad Mean: 0.0040
decoder_pred.0.bias | Grad Mean: 0.0068
decoder_pred.2.weight | Grad Mean: 0.0051
decoder_pred.2.bias | Grad Mean: 0.0054
decoder_pred.4.weight | Grad Mean: 0.0055
decoder_pred.4.bias | Grad Mean: 0.0057
decoder_pred.6.weight | Grad Mean: 0.0951
decoder_pred.6.bias | Grad Mean: 0.1487
Model Output Min/Max: -0.2783942222595215 79.64734649658203
Label Min/Max: 0.0 101.0


INFO:root:200 batch train loss is 16.7311. train avg loss is =18.2670
2025-10-24 05:28:47,477 - rank0 - INFO - 200 batch train loss is 16.7311. train avg loss is =18.2670


decoder_pred.0.weight | Grad Mean: 0.0034
decoder_pred.0.bias | Grad Mean: 0.0065
decoder_pred.2.weight | Grad Mean: 0.0059
decoder_pred.2.bias | Grad Mean: 0.0063
decoder_pred.4.weight | Grad Mean: 0.0054
decoder_pred.4.bias | Grad Mean: 0.0068
decoder_pred.6.weight | Grad Mean: 0.1013
decoder_pred.6.bias | Grad Mean: 0.1791
Model Output Min/Max: -0.49105870723724365 126.81024169921875
Label Min/Max: 0.0 117.0


  0%|          | 0/15 [00:00<?, ?it/s]INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-24 05:40:53,226 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-24 05:40:53,224 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=20251024054053a6181b21254ab16ftarget project=autonavi_traffic_brain table=tb_inter

odps_table is  odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0


INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=202510240544495b181b21254971bdtarget project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>
2025-10-24 05:44:50,091 - rank0 - INFO - Tunnel session created: <TableDownloadSession id=202510240544495b181b21254971bdtarget project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>


total row_count:4540037


INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-24 05:44:50,461 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-24 05:44:50,465 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=202510240544502d64b00b23fa96

decoder_pred.0.weight | Grad Mean: 0.0036
decoder_pred.0.bias | Grad Mean: 0.0027
decoder_pred.2.weight | Grad Mean: 0.0033
decoder_pred.2.bias | Grad Mean: 0.0009
decoder_pred.4.weight | Grad Mean: 0.0026
decoder_pred.4.bias | Grad Mean: 0.0004
decoder_pred.6.weight | Grad Mean: 0.0958
decoder_pred.6.bias | Grad Mean: 0.0007
Model Output Min/Max: -1.089592456817627 109.37799835205078
Label Min/Max: 0.0 101.0


INFO:root:200 batch train loss is 16.9380. train avg loss is =18.2236
2025-10-24 06:45:30,164 - rank0 - INFO - 200 batch train loss is 16.9380. train avg loss is =18.2236


decoder_pred.0.weight | Grad Mean: 0.0037
decoder_pred.0.bias | Grad Mean: 0.0038
decoder_pred.2.weight | Grad Mean: 0.0054
decoder_pred.2.bias | Grad Mean: 0.0018
decoder_pred.4.weight | Grad Mean: 0.0052
decoder_pred.4.bias | Grad Mean: 0.0016
decoder_pred.6.weight | Grad Mean: 0.1710
decoder_pred.6.bias | Grad Mean: 0.0419
Model Output Min/Max: -0.19333156943321228 113.72914123535156
Label Min/Max: 0.0 117.0


  0%|          | 0/15 [00:00<?, ?it/s]INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-24 07:31:24,103 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-24 07:31:24,105 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=20251024073124c7afb10b222df6aatarget project=autonavi_traffic_brain table=tb_inter

odps_table is  odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0


INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=202510240736519282382125476e91target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>
2025-10-24 07:36:51,763 - rank0 - INFO - Tunnel session created: <TableDownloadSession id=202510240736519282382125476e91target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>


total row_count:4540037


INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-24 07:36:52,500 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-24 07:36:52,502 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=2025102407365365803821251044

decoder_pred.0.weight | Grad Mean: 0.0029
decoder_pred.0.bias | Grad Mean: 0.0070
decoder_pred.2.weight | Grad Mean: 0.0056
decoder_pred.2.bias | Grad Mean: 0.0071
decoder_pred.4.weight | Grad Mean: 0.0069
decoder_pred.4.bias | Grad Mean: 0.0081
decoder_pred.6.weight | Grad Mean: 0.1656
decoder_pred.6.bias | Grad Mean: 0.2159
Model Output Min/Max: -0.31215664744377136 92.5507583618164
Label Min/Max: 0.0 101.0


INFO:root:200 batch train loss is 16.2890. train avg loss is =18.1684
2025-10-24 08:43:49,991 - rank0 - INFO - 200 batch train loss is 16.2890. train avg loss is =18.1684


decoder_pred.0.weight | Grad Mean: 0.0039
decoder_pred.0.bias | Grad Mean: 0.0061
decoder_pred.2.weight | Grad Mean: 0.0060
decoder_pred.2.bias | Grad Mean: 0.0043
decoder_pred.4.weight | Grad Mean: 0.0069
decoder_pred.4.bias | Grad Mean: 0.0041
decoder_pred.6.weight | Grad Mean: 0.1926
decoder_pred.6.bias | Grad Mean: 0.0994
Model Output Min/Max: -0.21804431080818176 148.21206665039062
Label Min/Max: 0.0 117.0


  0%|          | 0/15 [00:00<?, ?it/s]INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-24 09:12:24,334 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
2025-10-24 09:12:24,336 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_oot_data_v5 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_oot_data_v5', 'table partition': None}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=20251024091225db3b2721245aa879target project=autonavi_traffic_brain table=tb_inter

odps_table is  odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0


INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=202510240917566dd81c0b245aac69target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>
2025-10-24 09:17:56,708 - rank0 - INFO - Tunnel session created: <TableDownloadSession id=202510240917566dd81c0b245aac69target project=autonavi_traffic_brain table=tb_inter_sft_train_data_ep_v5 partition_spec=epoch=0>


total row_count:4540037


INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-24 09:17:57,219 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:root:odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
2025-10-24 09:17:57,223 - rank0 - INFO - odps://autonavi_traffic_brain/tables/tb_inter_sft_train_data_ep_v5/epoch=0 -> {'odps project': 'autonavi_traffic_brain', 'table name': 'tb_inter_sft_train_data_ep_v5', 'table partition': 'epoch=0'}
INFO:odps.tunnel.tabletunnel:Tunnel session created: <TableDownloadSession id=2025102409175706d91c0b248b1d

decoder_pred.0.weight | Grad Mean: 0.0038
decoder_pred.0.bias | Grad Mean: 0.0028
decoder_pred.2.weight | Grad Mean: 0.0042
decoder_pred.2.bias | Grad Mean: 0.0021
decoder_pred.4.weight | Grad Mean: 0.0028
decoder_pred.4.bias | Grad Mean: 0.0026
decoder_pred.6.weight | Grad Mean: 0.0417
decoder_pred.6.bias | Grad Mean: 0.0774
Model Output Min/Max: -0.24663478136062622 86.72576141357422
Label Min/Max: 0.0 101.0


INFO:root:200 batch train loss is 16.6857. train avg loss is =18.1381
2025-10-24 10:36:30,667 - rank0 - INFO - 200 batch train loss is 16.6857. train avg loss is =18.1381


decoder_pred.0.weight | Grad Mean: 0.0035
decoder_pred.0.bias | Grad Mean: 0.0065
decoder_pred.2.weight | Grad Mean: 0.0062
decoder_pred.2.bias | Grad Mean: 0.0054
decoder_pred.4.weight | Grad Mean: 0.0070
decoder_pred.4.bias | Grad Mean: 0.0052
decoder_pred.6.weight | Grad Mean: 0.1957
decoder_pred.6.bias | Grad Mean: 0.1318
Model Output Min/Max: -1.8269513845443726 126.75784301757812
Label Min/Max: 0.0 117.0
