<a href="https://colab.research.google.com/github/yohoobot/works/blob/main/use_qwen_MusicGen_Gradio_yes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ✅ 推荐安装指令（精简版）
!pip install gradio transformers torchaudio


Collecting gradio
  Downloading gradio-5.23.3-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

In [1]:
# ✅ 安装 Gradio（仅首次运行）
!pip install gradio
!pip install transformers torchaudio audiocraft accelerate bitsandbytes


Collecting gradio
  Downloading gradio-5.23.3-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

In [4]:
#【✅==部署small】
# ✅ 安装依赖（首次运行）
!pip install gradio transformers torchaudio

import json
import random
import torch
import requests
import torchaudio
import gradio as gr
from transformers import MusicgenProcessor, MusicgenForConditionalGeneration

# ✅ Qwen API 配置
QWEN_API_KEY = "sk-"  # 替换为你的阿里云 DashScope Key
QWEN_API_URL = "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation"

# ✅ 加载格式为 {"scene": "...", "music": "..."} 的 JSONL 数据
scene_music_data = []
with open("musicgen_scene_music_pairs.jsonl", "r", encoding="utf-8") as f:
    for line in f:
        scene_music_data.append(json.loads(line.strip()))

# ✅ 加载 MusicGen 模型（medium，约 12 秒）
MODEL_NAME = "facebook/musicgen-small"
processor = MusicgenProcessor.from_pretrained(MODEL_NAME)
model = MusicgenForConditionalGeneration.from_pretrained(MODEL_NAME)
model = model.to("cuda" if torch.cuda.is_available() else "cpu")

# ✅ Step 1: 构造多轮对话格式的 few-shot 示例
def build_few_shot_messages(k=2):
    examples = random.sample(scene_music_data, k)
    messages = [{"role": "system", "content": "You are a music cognition expert converting restaurant scene descriptions into music prompts suitable for MusicGen."}]
    for ex in examples:
        messages.append({"role": "user", "content": f"Scene: {ex['scene']}"})
        messages.append({"role": "assistant", "content": ex['music']})
    return messages

# ✅ Step 2: 加入当前用户场景，发送给 Qwen
def generate_music_description(scene_desc):
    messages = build_few_shot_messages(k=2)
    messages.append({"role": "user", "content": f"Scene: {scene_desc}"})

    headers = {
        "Authorization": f"Bearer {QWEN_API_KEY}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": "qwen2.5-14b-instruct",
        "input": {"messages": messages},
        "parameters": {"temperature": 0.5, "max_tokens": 200}
    }

    response = requests.post(QWEN_API_URL, headers=headers, json=payload)
    if response.status_code == 200:
        result = response.json()
        return result.get("output", {}).get("text", "No output")
    else:
        return f"Error: {response.text}"

# ✅ Step 3: 生成音乐（12 秒）+ 显式释放显存
def generate_music_from_text(music_desc, duration=12):
    inputs = processor(text=[music_desc], padding=True, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_length=600, do_sample=False)
    waveform = torch.tensor(outputs[0].cpu())
    sample_rate = 32000
    audio_path = "generated_music.wav"
    torchaudio.save(audio_path, waveform, sample_rate)

    del inputs, outputs, waveform
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    return audio_path

# ✅ Gradio 前端逻辑
def gradio_generate(scene_description):
    music_desc = generate_music_description(scene_description)
    if "Error" in music_desc:
        return music_desc, None
    audio_path = generate_music_from_text(music_desc)
    return music_desc, audio_path

# ✅ 构建 Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 🎵 AI背景音乐生成器")
    scene_input = gr.Textbox(label="🍽️ 餐馆环境描述", placeholder="如：温馨的意大利餐厅，适合情侣约会")
    generate_button = gr.Button("🎶 生成音乐")
    music_output = gr.Textbox(label="🎵 Qwen生成的音乐描述")
    audio_output = gr.Audio(label="🎧 播放生成音乐", type="filepath")
    generate_button.click(gradio_generate, inputs=[scene_input], outputs=[music_output, audio_output])
demo.launch(share=True)




Config of the text_encoder: <class 'transformers.models.t5.modeling_t5.T5EncoderModel'> is overwritten by shared text_encoder config: T5Config {
  "architectures": [
    "T5ForConditionalGeneration"
  ],
  "classifier_dropout": 0.0,
  "d_ff": 3072,
  "d_kv": 64,
  "d_model": 768,
  "decoder_start_token_id": 0,
  "dense_act_fn": "relu",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "relu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": false,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_decoder_layers": 12,
  "num_heads": 12,
  "num_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "task_specific_params": {
    "summarization": {
      "early_stopping": true,
      "length_penalty": 2.0,
      "max_length": 200,
      "min_length": 30,
      "no_repeat_ngram_size": 3,
      "num_beams": 4,
      "prefix": "summ

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d14b4cc3068453dbbf.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [2]:
#【✅=调meduim API=这个也可以但慢死】
# ✅ 安装依赖（首次运行）
!pip install gradio requests

import json
import random
import requests
import gradio as gr

# ✅ Qwen API 配置
QWEN_API_KEY = "sk-"  # 替换为你的阿里云 Key
QWEN_API_URL = "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation"

# ✅ Hugging Face MusicGen API 配置
HF_API_TOKEN = ""  # 替换为你的 Hugging Face Token
HF_API_URL = "https://api-inference.huggingface.co/models/facebook/musicgen-medium"
HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}

# ✅ 使用配额设置
DAILY_LIMIT = 10
call_counter = {"count": 0}

# ✅ 读取 JSONL 数据 {"scene": ..., "music": ...}
scene_music_data = []
with open("musicgen_scene_music_pairs.jsonl", "r", encoding="utf-8") as f:
    for line in f:
        scene_music_data.append(json.loads(line.strip()))

# ✅ Step 1: few-shot messages
def build_few_shot_messages(k=2):
    examples = random.sample(scene_music_data, k)
    messages = [{"role": "system", "content": "You are a music cognition expert converting restaurant scene descriptions into music prompts suitable for MusicGen."}]
    for ex in examples:
        messages.append({"role": "user", "content": f"Scene: {ex['scene']}"})
        messages.append({"role": "assistant", "content": ex['music']})
    return messages

# ✅ Step 2: Qwen 生成音乐描述
def generate_music_description(scene_desc):
    messages = build_few_shot_messages(k=2)
    messages.append({"role": "user", "content": f"Scene: {scene_desc}"})
    headers = {
        "Authorization": f"Bearer {QWEN_API_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": "qwen2.5-14b-instruct",
        "input": {"messages": messages},
        "parameters": {"temperature": 0.5, "max_tokens": 150}
    }
    response = requests.post(QWEN_API_URL, headers=headers, json=payload)
    if response.status_code == 200:
        result = response.json()
        return result.get("output", {}).get("text", "No output")
    else:
        return f"Error: {response.text}"

# ✅ Step 3: 调用 Hugging Face MusicGen API
def generate_music_from_text(music_desc):
    if call_counter["count"] >= DAILY_LIMIT:
        return "已达到今日调用上限，请明日再试或升级配额", None
    response = requests.post(HF_API_URL, headers=HF_HEADERS, json={"inputs": music_desc})
    if response.status_code == 200:
        audio_path = "musicgen_output.wav"
        with open(audio_path, "wb") as f:
            f.write(response.content)
        call_counter["count"] += 1
        return audio_path
    else:
        return f"API Error: {response.status_code} - {response.text}", None

# ✅ Gradio 接口
def gradio_generate(scene_description):
    music_desc = generate_music_description(scene_description)
    if "Error" in music_desc or "调用上限" in music_desc:
        return music_desc, None
    result = generate_music_from_text(music_desc)
    if isinstance(result, tuple):
        return result[0], None
    return music_desc, result

# ✅ Gradio 页面
with gr.Blocks() as demo:
    gr.Markdown("## 🎵 Hugging Face API 版音乐生成器")
    scene_input = gr.Textbox(label="🍽️ 餐馆环境描述", placeholder="如：温馨的意大利餐厅，适合情侣约会")
    generate_button = gr.Button("🎶 生成音乐")
    music_output = gr.Textbox(label="🎵 Qwen 生成的音乐描述")
    audio_output = gr.Audio(label="🎧 播放生成音乐", type="filepath")
    generate_button.click(gradio_generate, inputs=[scene_input], outputs=[music_output, audio_output])

demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://8d601a8ba377a6f0f4.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


