# 网球四大满贯冠亚军得主分析

In [2]:
# 导入必要的包
import pandas as pd
import requests
from bs4 import BeautifulSoup

# 设定数据源
URL = "https://en.wikipedia.org/wiki/List_of_Grand_Slam_men%27s_singles_finals"

# 发送 GET 请求
response = requests.get(URL)
response.raise_for_status()

# 解析 HTML
soup = BeautifulSoup(response.text, 'lxml')

# 四大满贯名称及表格定位关键字
slams = {
    "Australian Open": "Australian Open finals",
    "French Open": "French Open finals",
    "Wimbledon": "Wimbledon finals",
    "US Open": "US Open finals"
}

# 初始化数据列表
records = []

for slam, anchor in slams.items():
    # 定位到对应标题
    header = soup.find('span', {'id': anchor})
    if not header:
        print(f"未找到 {slam} 标题")
        continue
    # 找到标题对应的表格
    table = header.find_next('table', {'class': 'wikitable'})
    rows = table.find_all('tr')
    for row in rows[1:]:  # 跳过表头
        cols = row.find_all(['td', 'th'])
        if len(cols) < 5:
            continue
        # 解析年份，过滤无关行
        try:
            year = int(cols[0].text.strip()[:4])
        except:
            continue
        if not (1995 <= year <= 2025):
            continue
        winner = cols[1].get_text(strip=True)
        runner_up = cols[3].get_text(strip=True)
        # 冠军、亚军国籍
        try:
            winner_country = cols[2].find('a')['title'].split(' ')[-1]
        except:
            winner_country = cols[2].get_text(strip=True)
        try:
            runner_up_country = cols[4].find('a')['title'].split(' ')[-1]
        except:
            runner_up_country = cols[4].get_text(strip=True)
        # 记录一行
        records.append({
            'year': year,
            'tournament': slam,
            'winner': winner,
            'runner_up': runner_up,
            'winner_country': winner_country,
            'runner_up_country': runner_up_country
        })

# 构建 DataFrame
df = pd.DataFrame(records)
df = df.sort_values(['year', 'tournament'])

# 保存为 CSV 文件
df.to_csv('grand_slam_finals_1995_2025.csv', index=False, encoding='utf-8-sig')

# 展示前几行
print(df.head())


未找到 Australian Open 标题
未找到 French Open 标题
未找到 Wimbledon 标题
未找到 US Open 标题


KeyError: 'year'