<a href="https://colab.research.google.com/github/yohoobot/works/blob/main/getdata.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
# -*- coding: utf-8 -*-
"""
Spotify餐馆场景音乐爬虫（Client Credentials Flow）：
- 每关键词获取10个有描述的歌单
- 每歌单提取3首歌曲
- 歌手必须有 genre，否则跳过
"""

import requests
import pandas as pd
import time
import base64

# ========== 1. 设置 Spotify 凭据 ==========
CLIENT_ID = ""
CLIENT_SECRET = ""

# ========== 2. 获取 access_token ==========
def get_access_token(client_id, client_secret):
    auth_str = f"{client_id}:{client_secret}"
    b64_auth = base64.b64encode(auth_str.encode()).decode()
    headers = {
        "Authorization": f"Basic {b64_auth}",
        "Content-Type": "application/x-www-form-urlencoded"
    }
    data = {"grant_type": "client_credentials"}
    resp = requests.post("https://accounts.spotify.com/api/token", headers=headers, data=data)
    return resp.json().get("access_token") if resp.status_code == 200 else None

ACCESS_TOKEN = get_access_token(CLIENT_ID, CLIENT_SECRET)
if not ACCESS_TOKEN:
    raise RuntimeError("❌ 获取 access_token 失败")

HEADERS = {"Authorization": f"Bearer {ACCESS_TOKEN}"}

# ========== 3. 关键词列表 ==========
KEYWORDS = [
    "buffet restaurant"
]

# ========== 4. 初始化结果列表 ==========
data_pairs = []

# ========== 5. 爬取流程 ==========
for keyword in KEYWORDS:
    print(f"\n🔍 搜索关键词：{keyword}")
    search_url = f"https://api.spotify.com/v1/search?q={keyword}&type=playlist&limit=50"
    resp = requests.get(search_url, headers=HEADERS)
    if resp.status_code != 200:
        print("⚠️ 搜索失败：", resp.text)
        continue

    all_playlists = resp.json().get("playlists", {}).get("items", [])
    print(f"🔎 返回歌单总数：{len(all_playlists)}")


    valid_playlists = []
    for pl in all_playlists:
        if not pl or not isinstance(pl, dict):  # ← 检查 pl 有效性
            continue
        desc = pl.get("description", "")
        if desc.strip():
            valid_playlists.append(pl)
        if len(valid_playlists) >= 10:
            break


    print(f"✅ 有描述的歌单数：{len(valid_playlists)}")

    for pl in valid_playlists:
        playlist_id = pl.get("id")
        playlist_name = pl.get("name", "")
        playlist_desc = pl.get("description", "")
        playlist_url = pl.get("external_urls", {}).get("spotify", "")

        print(f"🎵 抓取歌单：{playlist_name}")
        tracks_url = f"https://api.spotify.com/v1/playlists/{playlist_id}/tracks?limit=3"
        track_resp = requests.get(tracks_url, headers=HEADERS)
        if track_resp.status_code != 200:
            print("⚠️ 歌单曲目抓取失败")
            continue

        tracks = track_resp.json().get("items", [])
        for item in tracks:
            if not item or "track" not in item:
                continue

            track_info = item.get("track")
            if not track_info or not isinstance(track_info, dict):
                continue  # ❗跳过无效 track

            track_name = track_info.get("name", "")

            artists = track_info.get("artists", [])
            if not artists:
                continue

            artist = artists[0]
            artist_name = artist.get("name", "")
            artist_id = artist.get("id")
            if not artist_id:
                continue

            # 获取 artist genres
            artist_genres = ""
            artist_url = f"https://api.spotify.com/v1/artists/{artist_id}"
            artist_resp = requests.get(artist_url, headers=HEADERS)
            if artist_resp.status_code != 200:
                continue
            genre_list = artist_resp.json().get("genres", [])
            if not genre_list:
                print(f"⚠️ 跳过无 genre 的歌曲：{track_name} - {artist_name}")
                continue
            artist_genres = ", ".join(genre_list)

            track_url = track_info.get("external_urls", {}).get("spotify", "")
            data_pairs.append({
                "keyword": keyword,
                "playlist_name": playlist_name,
                "playlist_description": playlist_desc,
                "playlist_url": playlist_url,
                "track_name": track_name,
                "artist": artist_name,
                "artist_genres": artist_genres,
                "track_url": track_url
            })

        time.sleep(1)

# ========== 6. 保存 CSV ==========
df = pd.DataFrame(data_pairs)
df.to_csv("spotify_scene_playlists.csv", index=False, encoding="utf-8-sig")
print("\n✅ 完成！已保存为 spotify_scene_playlists.csv")



🔍 搜索关键词：buffet restaurant
🔎 返回歌单总数：50
✅ 有描述的歌单数：10
🎵 抓取歌单：Brasserie & Bistro - Musique de fond
🎵 抓取歌单：SH@Villita Pop (español)
🎵 抓取歌单：what it sounded like in Tanzania 
🎵 抓取歌单：fancy restaurant music
⚠️ 跳过无 genre 的歌曲：Je te laisserai des mots - Patrick Watson
🎵 抓取歌单：Jimmy Buffett Beach Mix
⚠️ 跳过无 genre 的歌曲：Trying To Reason With Hurricane Season - Jimmy Buffett
⚠️ 跳过无 genre 的歌曲：Nautical Wheelers - Jimmy Buffett
⚠️ 跳过无 genre 的歌曲：Tin Cup Chalice - Jimmy Buffett
🎵 抓取歌单：Songs You Know by Heart
⚠️ 跳过无 genre 的歌曲：Cheeseburger In Paradise - Jimmy Buffett
⚠️ 跳过无 genre 的歌曲：He Went To Paris - Jimmy Buffett
⚠️ 跳过无 genre 的歌曲：Fins - Jimmy Buffett
🎵 抓取歌单：Chill Restaurant & Bar Vibes 
⚠️ 跳过无 genre 的歌曲：Break My Stride - Matthew Wilder
🎵 抓取歌单：Chill restaurant summer🌞
⚠️ 跳过无 genre 的歌曲：Sober - Childish Gambino
🎵 抓取歌单：Clean Restaurant Pop 2025 Weekend Night Family Friendly Playlist Mothers Day Fathers Day
⚠️ 跳过无 genre 的歌曲：Ordinary - Alex Warren
⚠️ 跳过无 genre 的歌曲：The Giver - Chappell Roan
🎵 抓取歌单：Jimmy Buffett B