In [None]:
# 安裝必要的庫
!pip install opencv-python moviepy gradio diffusers transformers torch torchvision torchaudio vits

import cv2
import numpy as np
from moviepy.editor import ImageSequenceClip, concatenate_videoclips, TextClip, CompositeVideoClip, AudioFileClip
import gradio as gr
from diffusers import StableDiffusionImg2ImgPipeline
from transformers import VitsModel, VitsTokenizer
import torch

# 1. 漫畫框格切分與儲存
def segment_panels(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    panels = []
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        panel = image[y:y+h, x:x+w]
        panels.append((panel, (x, y)))
    return panels

# 2. 圖片轉動畫 (img2vid) 使用 Stable Diffusion
pipe = StableDiffusionImg2ImgPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
pipe = pipe.to("cuda")

def img_to_video(panel, prompt, num_frames=30):
    frames = []
    for _ in range(num_frames):
        frame = pipe(prompt, image=panel, strength=0.75, num_inference_steps=50).images[0]
        frames.append(cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR))
    return frames

# 3. 生成中文字幕與台詞音效
model = VitsModel.from_pretrained("facebook/vits-hubert-large")
tokenizer = VitsTokenizer.from_pretrained("facebook/vits-hubert-large")

def generate_subtitle_audio(dialogue, duration=2):
    inputs = tokenizer(dialogue, return_tensors="pt")
    with torch.no_grad():
        audio = model(**inputs).waveform
    audio_clip = AudioFileClip(audio.numpy(), fps=22050)
    audio_clip = audio_clip.set_duration(duration)
    return audio_clip, TextClip(dialogue, fontsize=40, color='white', bg_color='black')

# 4. 拼接影片
def create_comic_video(image, prompts, dialogues):
    panels = segment_panels(image)
    video_clips = []
    for (panel, _), prompt, dialogue in zip(panels, prompts, dialogues):
        frames = img_to_video(panel, prompt)
        clip = ImageSequenceClip(frames, fps=15)
        audio, subtitle = generate_subtitle_audio(dialogue, clip.duration)
        video = CompositeVideoClip([clip, subtitle.set_position(('center', 'bottom'))])
        video = video.set_audio(audio)
        video_clips.append(video)
    final_video = concatenate_videoclips(video_clips, method="compose")
    return final_video

# 5. Gradio UI 整合
def process_comic(image, prompt1, prompt2, prompt3, dialogue1, dialogue2, dialogue3):
    prompts = [prompt1, prompt2, prompt3]
    dialogues = [dialogue1, dialogue2, dialogue3]
    video = create_comic_video(image, prompts, dialogues)
    video.write_videofile("output.mp4", fps=15)
    return "output.mp4"

interface = gr.Interface(
    fn=process_comic,
    inputs=[
        gr.Image(type="numpy"),
        gr.Textbox(label="Prompt for Panel 1"),
        gr.Textbox(label="Prompt for Panel 2"),
        gr.Textbox(label="Prompt for Panel 3"),
        gr.Textbox(label="Dialogue for Panel 1"),
        gr.Textbox(label="Dialogue for Panel 2"),
        gr.Textbox(label="Dialogue for Panel 3")
    ],
    outputs=gr.Video(label="Generated Video")
)

# 啟動 Gradio 介面
interface.launch()
