<a href="https://colab.research.google.com/github/yohoobot/works/blob/main/spt3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# -*- coding: utf-8 -*-
"""
Spotify餐馆场景音乐爬虫（使用 Client Credentials Flow 获取 token）：
根据关键词搜索含描述的公开歌单，并提取歌单与曲目信息。
"""

import requests
import pandas as pd
import time
import base64

# ========== 1. 设置 Spotify 开发者凭据 ==========
CLIENT_ID = ""
CLIENT_SECRET = ""


# ========== 2. 获取 Access Token ==========
def get_access_token(client_id, client_secret):
    auth_str = f"{client_id}:{client_secret}"
    b64_auth = base64.b64encode(auth_str.encode()).decode()

    headers = {
        "Authorization": f"Basic {b64_auth}",
        "Content-Type": "application/x-www-form-urlencoded"
    }
    data = {
        "grant_type": "client_credentials"
    }
    resp = requests.post("https://accounts.spotify.com/api/token", headers=headers, data=data)
    if resp.status_code == 200:
        return resp.json().get("access_token")
    else:
        print("❌ 无法获取 access_token")
        print(resp.text)
        return None

ACCESS_TOKEN = get_access_token(CLIENT_ID, CLIENT_SECRET)
if not ACCESS_TOKEN:
    raise RuntimeError("无法获取 access_token，终止程序。")

HEADERS = {"Authorization": f"Bearer {ACCESS_TOKEN}"}

# ========== 3. 构建关键词列表 ==========
KEYWORDS = [
    "restaurant", "cafe", "coffee shop", "dining",
    "brunch spot", "fine dining", "bistro", "bar",
    "japanese restaurant", "sushi bar", "izakaya",
    "chinese restaurant", "hotpot", "dim sum",
    "korean bbq", "french restaurant", "italian restaurant",
    "mexican restaurant", "indian restaurant", "vegan cafe"
]

# ========== 4. 初始化结果列表 ==========
data_pairs = []

# ========== 5. 遍历关键词，搜索播放列表 ==========
for keyword in KEYWORDS:
    print(f"\n🔍 正在搜索关键词：{keyword}")
    search_url = f"https://api.spotify.com/v1/search?q={keyword}&type=playlist&limit=20"
    resp = requests.get(search_url, headers=HEADERS)
    if resp.status_code != 200:
        print(f"⚠️ 搜索失败：{search_url}")
        print(resp.text)
        continue

    playlists = resp.json().get("playlists", {}).get("items", [])
    print(f"📊 找到歌单数：{len(playlists)}")

    for pl in playlists:
        if not pl or not isinstance(pl, dict):
            continue

        playlist_id = pl.get("id")
        playlist_name = pl.get("name") or ""
        playlist_desc = pl.get("description") or ""
        playlist_url = pl.get("external_urls", {}).get("spotify", "")

        if not playlist_id or not playlist_desc.strip():
            print(f"⚠️ 跳过无效或无描述歌单：{playlist_name}")
            continue

        print(f"🎵 抓取歌单：{playlist_name}")
        playlist_tracks_url = f"https://api.spotify.com/v1/playlists/{playlist_id}/tracks?limit=3"
        track_resp = requests.get(playlist_tracks_url, headers=HEADERS)
        if track_resp.status_code != 200:
            print(f"⚠️ 歌单曲目抓取失败：{playlist_tracks_url}")
            print(track_resp.text)
            continue

        tracks = track_resp.json().get("items", [])
        for item in tracks:
            track_info = item.get("track") if item else {}
            if not track_info or not isinstance(track_info, dict):
                continue

            track_name = track_info.get("name") or ""
            artists = track_info.get("artists") or []
            artist_name = artists[0].get("name") if artists and isinstance(artists[0], dict) else ""
            track_link = track_info.get("external_urls", {}).get("spotify", "")

            if not track_name:
                continue

            data_pairs.append({
                "keyword": keyword,
                "playlist_name": playlist_name,
                "playlist_description": playlist_desc,
                "playlist_url": playlist_url,
                "track_name": track_name,
                "artist": artist_name,
                "track_url": track_link
            })

        time.sleep(1)

# ========== 6. 保存为CSV ==========
df = pd.DataFrame(data_pairs)
df.to_csv("spotify_scene_playlists.csv", index=False, encoding="utf-8-sig")
print("\n✅ 数据抓取完成，保存为 spotify_scene_playlists.csv")



🔍 正在搜索关键词：restaurant
📊 找到歌单数：20
🎵 抓取歌单：Restaurant Music 2025
⚠️ 跳过无效或无描述歌单：restaurant lounge vibes but in a cool, chill, funky way
🎵 抓取歌单：RESTAURANT MUSIC 2025 🍽  chill covers & dinner songs
⚠️ 跳过无效或无描述歌单：Upbeat clean restaurant 2.0
🎵 抓取歌单：fancy restaurant music
🎵 抓取歌单：Restaurant Lounge Music 2025 🍸 Background Music 🍸 Dinner Chill
⚠️ 跳过无效或无描述歌单：Clean Restaurant Music 2023 / 2024 (2)
⚠️ 跳过无效或无描述歌单：upbeat restaurant music 
🎵 抓取歌单：Chill Restaurant & Bar Vibes 
🎵 抓取歌单：Chill restaurant summer🌞
🎵 抓取歌单：Dinner Jazz in the Background
⚠️ 跳过无效或无描述歌单：90's and 2000's for restaurant
🎵 抓取歌单：DINNER JAZZ 🍷 Instrumental Jazz & Bossa Nova (Restaurant Music)
⚠️ 跳过无效或无描述歌单：Mexican Restaurant Music
⚠️ 跳过无效或无描述歌单：my clean restaraunt playlist
🎵 抓取歌单：Restaurant Background Music 🍸 chill covers & saxophone lounge
⚠️ 跳过无效或无描述歌单：clean pop, indie, and r&b restaurant hits

🔍 正在搜索关键词：cafe
📊 找到歌单数：20
🎵 抓取歌单：Cozy Coffee Shop ☕ Jazz in the Background
🎵 抓取歌单：lofi Jazz Cafe 🎷Study Beats & Chill Vibes
🎵 抓取歌单：Café Music 20