<a href="https://colab.research.google.com/github/yohoobot/works/blob/main/spt2_abandon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!lsof -i :8888


In [None]:
!kill -9 5937

In [1]:
!lsof -i :8888

In [5]:
# -*- coding: utf-8 -*-
"""
Spotify餐馆场景音乐爬虫：构建"场景描述" - "背景音乐描述"训练样本
新方案：抓取歌曲的预览音频（preview_url），使用本地音乐分析工具提取音乐特征。
兼容 Colab/Jupyter：手动粘贴 code，无需本地监听端口。
"""

import requests
import pandas as pd
import time
import os
from urllib.parse import urlencode

# ========== 1. 设置你的Spotify API凭据 ==========
CLIENT_ID = ""
CLIENT_SECRET = ""
REDIRECT_URI = "http://localhost:8888/callback"
SCOPE = "user-library-read playlist-read-private"

# ========== 2. 用户授权：生成链接并手动输入 code ==========
params = urlencode({
    "client_id": CLIENT_ID,
    "response_type": "code",
    "redirect_uri": REDIRECT_URI,
    "scope": SCOPE
})
auth_url = f"https://accounts.spotify.com/authorize?{params}"
print("🔑 请复制以下链接到浏览器中打开进行授权：")
print(auth_url)
code = input("📥 授权完成后，请将浏览器地址栏中的 code 参数粘贴到此处：\n")

# ========== 3. 用 code 获取 access token ==========
TOKEN_URL = "https://accounts.spotify.com/api/token"
token_data = {
    "grant_type": "authorization_code",
    "code": code,
    "redirect_uri": REDIRECT_URI,
    "client_id": CLIENT_ID,
    "client_secret": CLIENT_SECRET
}
token_response = requests.post(TOKEN_URL, data=token_data)
token_json = token_response.json()
ACCESS_TOKEN = token_json.get("access_token")
HEADERS = {"Authorization": f"Bearer {ACCESS_TOKEN}"}

# ========== 4. 构建关键词列表 ==========
KEYWORDS = [
    "restaurant", "cafe", "coffee shop", "dining",
    "brunch spot", "fine dining", "bistro", "bar",
    "japanese restaurant", "sushi bar", "izakaya",
    "chinese restaurant", "hotpot", "dim sum",
    "korean bbq", "french restaurant", "italian restaurant",
    "mexican restaurant", "indian restaurant", "vegan cafe"
]

# ========== 5. 初始化数据集 ==========
data_pairs = []
os.makedirs("preview_audio", exist_ok=True)

# ========== 6. 遍历关键词，搜索歌单 ==========
for keyword in KEYWORDS:
    print(f"🔍 正在搜索关键词：{keyword}")
    search_url = "https://api.spotify.com/v1/search"
    params = {"q": keyword, "type": "playlist", "limit": 3, "market": "US"}
    resp = requests.get(search_url, headers=HEADERS, params=params)
    playlist_items = resp.json().get("playlists", {}).get("items")

    if not playlist_items:
        print("❌ 没有找到相关歌单。")
        continue

    for pl in playlist_items:
        if not isinstance(pl, dict):
            continue

        playlist_name = pl.get("name", "")
        playlist_desc = pl.get("description", "")
        playlist_id = pl.get("id")

        if not playlist_id:
            continue

        track_url = f"https://api.spotify.com/v1/playlists/{playlist_id}/tracks?limit=10"
        print(f"🎵 正在抓取歌单：{playlist_name}")
        track_resp = requests.get(track_url, headers=HEADERS)
        if track_resp.status_code != 200:
            print(f"⚠️ 歌单获取失败：{track_url}")
            continue

        track_data = track_resp.json()
        tracks = track_data.get("items", [])

        if not tracks:
            print("⚠️ 歌单中没有歌曲条目。")
            continue

        for track_obj in tracks[:5]:
            if not isinstance(track_obj, dict):
                continue
            track = track_obj.get("track")
            if not isinstance(track, dict):
                continue
            track_id = track.get("id")
            track_name = track.get("name", "")
            preview_url = track.get("preview_url")
            artist_list = track.get("artists")
            artist_name = artist_list[0].get("name") if artist_list and isinstance(artist_list[0], dict) else "unknown"

            if not preview_url:
                print(f"⚠️ 无预览音频：{track_name}")
                continue

            # 下载预览音频
            try:
                audio_data = requests.get(preview_url).content
                filename = f"preview_audio/{track_id}.mp3"
                with open(filename, "wb") as f:
                    f.write(audio_data)
                print(f"🎧 预览音频已保存：{filename}")
            except Exception as e:
                print(f"⚠️ 下载失败：{preview_url}")
                continue

            scene_desc = f"{playlist_name}. {playlist_desc}"
            music_info = f"{track_name} by {artist_name}, preview saved at {filename}"
            data_pairs.append({"scene": scene_desc.strip(), "music_preview": music_info.strip()})
            time.sleep(1)  # 请休息 1 秒，避免频繁请求

# ========== 7. 保存为CSV ==========
df = pd.DataFrame(data_pairs)
df.to_csv("scene_music_pairs.csv", index=False, encoding="utf-8-sig")
print("✅ 数据抓取完成，保存为 scene_music_pairs.csv")


🔑 请复制以下链接到浏览器中打开进行授权：
https://accounts.spotify.com/authorize?client_id=&response_type=code&redirect_uri=http%3A%2F%2Flocalhost%3A8888%2Fcallback&scope=user-library-read+playlist-read-private


KeyboardInterrupt: Interrupted by user