# 网球比赛动量分析与结果预测

## 研究问题
"势头"是体育比赛中一个关键但主观的概念。本实验基于网球比赛逐分数据，利用LSTM识别比赛中"势头"变化，并预测下一分的胜负结果。

## 研究任务
1. 构建时间序列（按每一分的得失序列）
2. 使用LSTM预测下一个时刻选手得分概率
3. 对比LSTM和Transformer模型的预测性能

## 1. 环境设置与数据加载

In [None]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from torch.utils.data import DataLoader
import wandb

# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False

# 设置设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"使用设备: {device}")

In [None]:
# 加载数据
from data_preprocessing import prepare_data

file_path = "选题三_Data/2024_Wimbledon_featured_matches.csv"
df_clean, match_results = prepare_data(file_path)
print(f"数据加载完成: {len(df_clean)} 个数据点, {df_clean['match_id'].nunique()} 场比赛")

## 2. 特征工程与时间序列构建

In [None]:
# 创建动量特征
from time_series_data import create_momentum_features

df_momentum = create_momentum_features(df_clean)
print("动量特征创建完成")
print(f"特征数: {df_momentum.shape[1]}")

In [None]:
# 构建时间序列数据
from time_series_data import prepare_time_series_data

sequence_length = 10
train_loader, val_loader, test_loader, feature_dim = prepare_time_series_data(
    df_momentum, sequence_length=sequence_length
)
print(f"时间序列数据构建完成")
print(f"序列长度: {sequence_length}, 特征维度: {feature_dim}")

## 3. 模型定义与训练

In [None]:
# 初始化WandB
wandb.init(project="tennis-momentum", name="experiment-1")

In [None]:
# 创建LSTM模型
from models import create_model

lstm_model = create_model(
    'lstm',
    feature_dim,
    hidden_dim=128,
    num_layers=2,
    dropout=0.3
)
print(f"LSTM模型创建完成，参数量: {sum(p.numel() for p in lstm_model.parameters())}")

In [None]:
# 训练LSTM模型
from train import train_model

lstm_history = train_model(
    lstm_model,
    train_loader,
    val_loader,
    num_epochs=50,
    learning_rate=0.001,
    device=device,
    use_wandb=True,
    model_name="LSTM"
)

In [None]:
# 创建并训练Transformer模型
transformer_model = create_model(
    'transformer',
    feature_dim,
    d_model=128,
    nhead=8,
    num_layers=2,
    dropout=0.3
)

transformer_history = train_model(
    transformer_model,
    train_loader,
    val_loader,
    num_epochs=50,
    learning_rate=0.001,
    device=device,
    use_wandb=True,
    model_name="Transformer"
)

## 4. 模型评估

In [None]:
# 评估所有模型
from train import evaluate

models = {
    'LSTM': lstm_model,
    'Transformer': transformer_model
}

results = {}
for name, model in models.items():
    model.load_state_dict(torch.load(f'checkpoints/best_{name}.pth'))
    metrics = evaluate(model, test_loader, nn.CrossEntropyLoss(), device)
    results[name] = metrics
    print(f"\n{name} 测试结果:")
    print(f"  准确率: {metrics['accuracy']:.4f}")
    print(f"  F1分数: {metrics['f1']:.4f}")
    print(f"  AUC: {metrics['auc']:.4f}")
    print(f"  MAE: {metrics['mae']:.4f}")
    print(f"  RMSE: {metrics['rmse']:.4f}")

## 5. 可视化分析

In [None]:
# 绘制训练曲线
from visualization_deep import plot_training_curves

for name, history in [('LSTM', lstm_history), ('Transformer', transformer_history)]:
    plot_training_curves(history, name, f'visualizations/training_curves_{name}.png')

In [None]:
# 绘制模型对比
from visualization_deep import plot_model_comparison

plot_model_comparison(results, 'visualizations/model_comparison.png')

In [None]:
# 绘制预测趋势
from visualization_deep import plot_prediction_trend

best_model_name = max(results.keys(), key=lambda x: results[x]['f1'])
best_model = models[best_model_name]
plot_prediction_trend(best_model, test_loader, device, save_path='visualizations/prediction_trend.png')

## 6. 结论

通过对比LSTM和Transformer两种模型，我们发现：
- [根据实际结果填写]
- [根据实际结果填写]