In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from datetime import datetime
from sklearn.neighbors import BallTree
import joblib

# ==== 1. 数据集类 ====
class ParkingDataset(Dataset):
    def __init__(self, df, label_encoder, scaler):
        self.df = df.copy()
        
        self.df["Status_Timestamp"] = pd.to_datetime(self.df["Status_Timestamp"], errors='coerce')
        self.df["hour"] = self.df["Status_Timestamp"].dt.hour
        self.df["dayofweek"] = self.df["Status_Timestamp"].dt.dayofweek
        
        self.df[["lat", "lon"]] = self.df["Location"].str.split(",", expand=True).astype(float)
        
        self.df["Zone_Number"] = self.df["Zone_Number"].fillna("unknown").astype(str)
        self.df["Zone_Number_enc"] = label_encoder.transform(self.df["Zone_Number"])
        
        self.df["is_free"] = (self.df["Status_Description"].str.lower() == "unoccupied").astype(int)
        
        features = ["hour", "dayofweek", "Zone_Number_enc", "lat", "lon"]
        self.X = scaler.transform(self.df[features])
        self.y = self.df["is_free"].values.astype(np.float32)
        
        self.kerbside_id = self.df["KerbsideID"].values
        self.latlon = self.df[["lat", "lon"]].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float32), torch.tensor(self.y[idx], dtype=torch.float32)

# ==== 2. 模型 ====
class ParkingModel(nn.Module):
    def __init__(self, input_dim):
        super(ParkingModel, self).__init__()
        self.backbone = nn.Sequential(
                nn.Linear(input_dim, 128),
                nn.BatchNorm1d(128),
                nn.ReLU(),
                nn.Dropout(0.3),
                nn.Linear(128, 256),
                nn.BatchNorm1d(256),
                nn.ReLU(),
                nn.Dropout(0.3),
                nn.Linear(256, 128),
                nn.BatchNorm1d(128),
                nn.ReLU(),
                nn.Dropout(0.2),
        )
        self.fc = nn.Linear(128, 1)

    def forward(self, x):
        x = self.backbone(x)
        return torch.sigmoid(self.fc(x))

# ==== 3. 读取和预处理数据 ====
df = pd.read_csv("/Users/sharm/Desktop/FIT5120-OnBoarding/commute_planner/on-street-parking-bay-sensors.csv")

df["Status_Timestamp"] = df["Status_Timestamp"].astype(str).str.strip()
df["Status_Timestamp"] = pd.to_datetime(
    df["Status_Timestamp"], errors="coerce", utc=True).dt.tz_convert(None)

df["hour"] = df["Status_Timestamp"].dt.hour
df["dayofweek"] = df["Status_Timestamp"].dt.dayofweek

df[["lat", "lon"]] = df["Location"].str.split(",", expand=True).astype(float)
df["Zone_Number"] = df["Zone_Number"].fillna("unknown").astype(str)

def clean_zone_number(z):
    try:
        f = float(z)
        if f.is_integer():
            return str(int(f))
        else:
            return str(f)
    except:
        return z

df["Zone_Number"] = df["Zone_Number"].apply(clean_zone_number)

label_encoder = LabelEncoder()
df["Zone_Number_enc"] = label_encoder.fit_transform(df["Zone_Number"])

features = ["hour", "dayofweek", "Zone_Number_enc", "lat", "lon"]
scaler = StandardScaler()
scaler.fit(df[features])

# ==== 4. 分割数据用于预训练和微调 ====
recent_cutoff = df["Status_Timestamp"].max() - pd.DateOffset(months=2)
recent_df = df[df["Status_Timestamp"] >= recent_cutoff].copy()
pretrain_df = df.copy()

# ==== 5. 设备和模型初始化 ====
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ParkingModel(input_dim=len(features)).to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device).unsqueeze(1)
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(loader)

def eval_one_epoch(model, loader, criterion, device):
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device).unsqueeze(1)
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item()
    return val_loss / len(loader)

def compute_accuracy(model, loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device).unsqueeze(1)
            outputs = model(X_batch)
            preds = (outputs >= 0.5).float()
            correct += (preds == y_batch).sum().item()
            total += y_batch.size(0)
    return correct / total

# ==== 6. 预训练阶段 ====
train_df_pre, val_df_pre = train_test_split(pretrain_df, test_size=0.2, random_state=42)
train_dataset_pre = ParkingDataset(train_df_pre, label_encoder, scaler)
val_dataset_pre = ParkingDataset(val_df_pre, label_encoder, scaler)
train_loader_pre = DataLoader(train_dataset_pre, batch_size=64, shuffle=True)
val_loader_pre = DataLoader(val_dataset_pre, batch_size=64, shuffle=False)

#print("=== Pretraining on all historical data ===")
pretrain_epochs = 450
for epoch in range(pretrain_epochs):
    train_loss = train_one_epoch(model, train_loader_pre, criterion, optimizer, device)
    val_loss = eval_one_epoch(model, val_loader_pre, criterion, device)
    #print(f"Pretrain Epoch [{epoch+1}/{pretrain_epochs}] Train Loss: {train_loss:.4f} Val Loss: {val_loss:.4f}")

# ==== 7. 微调阶段 ====
train_df_fine, val_df_fine = train_test_split(recent_df, test_size=0.2, random_state=42)
train_dataset_fine = ParkingDataset(train_df_fine, label_encoder, scaler)
val_dataset_fine = ParkingDataset(val_df_fine, label_encoder, scaler)
train_loader_fine = DataLoader(train_dataset_fine, batch_size=64, shuffle=True)
val_loader_fine = DataLoader(val_dataset_fine, batch_size=64, shuffle=False)

#print("=== Fine-tuning on recent 2 months data ===")
finetune_epochs = 100
for epoch in range(finetune_epochs):
    train_loss = train_one_epoch(model, train_loader_fine, criterion, optimizer, device)
    val_loss = eval_one_epoch(model, val_loader_fine, criterion, device)
    val_acc = compute_accuracy(model, val_loader_fine, device)
    print(f"Finetune Epoch [{epoch+1}/{finetune_epochs}]  Accuracy: {val_acc:.4f}")

# ==== 8. 保存模型和预处理器 ====
torch.save(model.state_dict(), "parking_model_finetuned.pth")
joblib.dump(label_encoder, "label_encoder.pkl")
joblib.dump(scaler, "scaler.pkl")

# ==== 9. 推理函数（保持不变） ====
def find_nearby_free_slots(zone_number, current_time, top_k=5, radius_m=500):
    model = ParkingModel(input_dim=len(features))
    model.load_state_dict(torch.load("parking_model_finetuned.pth", map_location="cpu"))
    model.eval()

    label_encoder_loaded = joblib.load("label_encoder.pkl")
    scaler_loaded = joblib.load("scaler.pkl")

    zone_number_str = str(zone_number)
    if zone_number_str not in label_encoder_loaded.classes_:
        print(f"Zone number {zone_number_str} not found in label encoder classes.")
        return [0]

    hour = current_time.hour
    dayofweek = current_time.weekday()
    zone_enc = label_encoder_loaded.transform([zone_number_str])[0]

    zone_df = df[df["Zone_Number"] == zone_number_str].copy()
    if zone_df.empty:
        return [0]

    zone_df["hour"] = hour
    zone_df["dayofweek"] = dayofweek
    zone_df[["lat", "lon"]] = zone_df["Location"].str.split(",", expand=True).astype(float)
    zone_df["Zone_Number_enc"] = zone_enc

    X_zone = scaler_loaded.transform(zone_df[features])
    with torch.no_grad():
        probs = model(torch.tensor(X_zone, dtype=torch.float32)).numpy().flatten()

    free_slots = zone_df[probs > 0.5][["KerbsideID", "lat", "lon"]]
    if free_slots.empty:
        return [0]

    tree = BallTree(np.radians(free_slots[["lat", "lon"]].values), metric='haversine')
    query_point = np.radians([[free_slots["lat"].mean(), free_slots["lon"].mean()]])
    dist, ind = tree.query(query_point, k=min(top_k, len(free_slots)))
    nearby_ids = free_slots.iloc[ind[0]]["KerbsideID"].tolist()

    return nearby_ids

# ==== 10. 测试推理 ====
test_result = find_nearby_free_slots(zone_number=7539, current_time=datetime.now())
print("Recommended vacant parking spaces：", test_result)

Finetune Epoch [1/100]  Accuracy: 0.7697
Finetune Epoch [2/100]  Accuracy: 0.7610
Finetune Epoch [3/100]  Accuracy: 0.7610
Finetune Epoch [4/100]  Accuracy: 0.7610
Finetune Epoch [5/100]  Accuracy: 0.7544
Finetune Epoch [6/100]  Accuracy: 0.7544
Finetune Epoch [7/100]  Accuracy: 0.7456
Finetune Epoch [8/100]  Accuracy: 0.7522
Finetune Epoch [9/100]  Accuracy: 0.7566
Finetune Epoch [10/100]  Accuracy: 0.7741
Finetune Epoch [11/100]  Accuracy: 0.7544
Finetune Epoch [12/100]  Accuracy: 0.7566
Finetune Epoch [13/100]  Accuracy: 0.7566
Finetune Epoch [14/100]  Accuracy: 0.7566
Finetune Epoch [15/100]  Accuracy: 0.7675
Finetune Epoch [16/100]  Accuracy: 0.7588
Finetune Epoch [17/100]  Accuracy: 0.7522
Finetune Epoch [18/100]  Accuracy: 0.7544
Finetune Epoch [19/100]  Accuracy: 0.7544
Finetune Epoch [20/100]  Accuracy: 0.7522
Finetune Epoch [21/100]  Accuracy: 0.7632
Finetune Epoch [22/100]  Accuracy: 0.7566
Finetune Epoch [23/100]  Accuracy: 0.7632
Finetune Epoch [24/100]  Accuracy: 0.7675
F

In [3]:
torch.save(model.state_dict(), "./models23/parking_model_finetuned.pth")
joblib.dump(label_encoder, "./models23/label_encoder.pkl")
joblib.dump(scaler, "./models23/scaler.pkl")

['./models23/scaler.pkl']

In [4]:
#调用：
#import torch
#import joblib

# 先创建模型实例（要和训练时的模型结构一样）
#model = ParkingModel(input_dim=5)  # 你的特征数是5

# 加载模型参数
#model.load_state_dict(torch.load("./models/parking_model_finetuned.pth", map_location="cpu"))
#model.eval()  # 切换到评估模式

# 加载预处理对象
#label_encoder = joblib.load("./models/label_encoder.pkl")
#scaler = joblib.load("./models/scaler.pkl")