In [None]:
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
import csv
import os

# 创建一个会话对象
session = requests.Session()

# 设置User-Agent
user_agent = UserAgent()
headers = {'User-Agent': user_agent.random}

# 基础URL
base_url = 'https://zwfw.mct.gov.cn/scenicspot?ssName=&province=&ssYear=&type=gb'

# 存储所有景区信息的列表
all_scenic_spots = []

# 翻页功能，爬取指定页面的数据
def fetch_page_data(page_num):
    page_url = f"{base_url}&pageNum={page_num}"
    try:
        response = session.get(page_url, headers=headers, timeout=10)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            table = soup.find('table')
            if table:
                for spot in table.find_all('tr')[1:]:  # 跳过表头
                    columns = [td.text.strip() for td in spot.find_all('td')]
                    if len(columns) >= 3:
                        # 提取景区名称和所属地区，这里假设名称和地区在前两列
                        name = columns[0]
                        location = columns[1]
                        year  = columns[2]
                        all_scenic_spots.append((name, location,year))
            else:
                print('没有找到包含景区信息的表格。')
        else:
            print(f'Failed to retrieve content, status code: {response.status_code}')
    except requests.exceptions.RequestException as e:
        print(f'请求过程中发生错误：{e}')

# 爬取指定页面，这里以第二页和第三页为例
for i in range(1,24):
    fetch_page_data(i)

# 指定CSV文件的保存路径
csv_folder = r'C:\Users\kkiop\Desktop\data'
csv_file = os.path.join(csv_folder, 'scenic_spots.csv')

# 确保文件夹存在
if not os.path.exists(csv_folder):
    os.makedirs(csv_folder)

# 将数据保存到CSV文件
with open(csv_file, 'w', newline='', encoding='utf-8-sig') as file:
    writer = csv.writer(file)
    # 写入标题行
    writer.writerow(['景区名称', '所属地区','评定年份'])
    # 写入数据行
    for spot in all_scenic_spots:
        writer.writerow(spot)

print(f'数据已保存到 {csv_file}')