In [None]:
# ==========================================
# Cell 1: 경로 설정 및 토큰 활성화
# ==========================================

from pathlib import Path
import os, sys

# ---- 프로젝트 루트 ----
PROJECT_ROOT = Path("/workspace/baseball_pipeline")
os.chdir(PROJECT_ROOT)

if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

print("PROJECT_ROOT:", PROJECT_ROOT)

# ---- 데이터 디렉토리 ----
from src import DATA_DIR, FAISS_DIR, FISH_ROOT

INPUT_VIDEO_DIR = DATA_DIR / "input_videos"
STT_RAW_DIR = DATA_DIR / "stt_raw"
STT_SEG_DIR = DATA_DIR / "stt_segments"
LLM_OUT_DIR = DATA_DIR / "llm_outputs"
TTS_AUDIO_DIR = DATA_DIR / "tts_audio"
OUTPUT_VIDEO_DIR = DATA_DIR / "output_videos"
DEMUCS_ROOT = DATA_DIR / "demucs"
FRAMES_ROOT = DATA_DIR / "frames"
SRC_ROOT = PROJECT_ROOT / "src"

if str(SRC_ROOT) not in sys.path:
    sys.path.append(str(SRC_ROOT))

# 디렉토리 생성
for d in (DATA_DIR, INPUT_VIDEO_DIR, STT_RAW_DIR, STT_SEG_DIR, 
          LLM_OUT_DIR, TTS_AUDIO_DIR, OUTPUT_VIDEO_DIR, DEMUCS_ROOT, 
          FRAMES_ROOT, FAISS_DIR, SRC_ROOT):
    d.mkdir(parents=True, exist_ok=True)

print("\n✅ 디렉토리 생성 완료")

# ---- API 토큰 설정 ----
CLOVA_INVOKE_URL = ""
CLOVA_SECRET_KEY = ""

HF_TOKEN = ""
OPENAI_API_KEY = "sk-proj-..."  # 필요시 입력

if HF_TOKEN and "xxx" not in HF_TOKEN:
    os.environ["HF_TOKEN"] = HF_TOKEN
    os.environ["HUGGINGFACE_HUB_TOKEN"] = HF_TOKEN
    os.environ["HUGGING_FACE_HUB_TOKEN"] = HF_TOKEN

if OPENAI_API_KEY:
    os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

print("✅ API 토큰 설정 완료\n")


PROJECT_ROOT: /workspace/baseball_pipeline

✅ 디렉토리 생성 완료
✅ API 토큰 설정 완료



In [2]:
import re
import gdown

def download_gdrive_video(gdrive_url: str, dest_name: str | None = None) -> Path:
    """구글 드라이브 공유 링크에서 영상 다운로드"""
    m = re.search(r"id=([a-zA-Z0-9_-]+)", gdrive_url)
    if not m:
        m = re.search(r"/d/([a-zA-Z0-9_-]+)/", gdrive_url)
    
    if not m:
        raise ValueError(f"구글 드라이브 링크에서 file id를 찾지 못했습니다.\n현재 url: {gdrive_url}")
    
    file_id = m.group(1)
    
    if dest_name is None:
        dest_name = f"{file_id}.mp4"
    
    out_path = INPUT_VIDEO_DIR / dest_name
    
    if out_path.exists():
        print(f"[GDRIVE] 파일이 이미 존재합니다: {out_path}")
        return out_path
    
    url = f"https://drive.google.com/uc?id={file_id}"
    print("[GDRIVE] file_id:", file_id)
    print("[GDRIVE] url    :", url)
    print("[GDRIVE] output :", out_path)
    
    gdown.download(url, str(out_path), quiet=False)
    
    print("[GDRIVE] 다운로드 완료:", out_path)
    return out_path


# ====== 여기에 구글 드라이브 링크 입력 ======
gdrive_url = "https://drive.google.com/file/d/1SyQ47qpTjsE3MqEWAqNnlgWUrlfD14gC/view?usp=sharing"
VIDEO_NAME = "SSG_삼성_10_14_2025_준플레이오프_4차전.mp4"

local_video_path = download_gdrive_video(gdrive_url, dest_name=VIDEO_NAME)
video_stem = Path(VIDEO_NAME).stem

print(f"\n✅ 영상 다운로드 완료")
print(f"  video_stem: {video_stem}")
print(f"  경로: {local_video_path}\n")


[GDRIVE] file_id: 1SyQ47qpTjsE3MqEWAqNnlgWUrlfD14gC
[GDRIVE] url    : https://drive.google.com/uc?id=1SyQ47qpTjsE3MqEWAqNnlgWUrlfD14gC
[GDRIVE] output : /workspace/skn17_final_runpod_code/baseball_pipeline/data/input_videos/SSG_삼성_10_14_2025_준플레이오프_4차전.mp4


Downloading...
From (original): https://drive.google.com/uc?id=1SyQ47qpTjsE3MqEWAqNnlgWUrlfD14gC
From (redirected): https://drive.google.com/uc?id=1SyQ47qpTjsE3MqEWAqNnlgWUrlfD14gC&confirm=t&uuid=338f5a55-a1ac-4cbb-aded-7953aaaf0911
To: /workspace/skn17_final_runpod_code/baseball_pipeline/data/input_videos/SSG_삼성_10_14_2025_준플레이오프_4차전.mp4
100%|██████████| 494M/494M [00:11<00:00, 43.3MB/s] 


[GDRIVE] 다운로드 완료: /workspace/skn17_final_runpod_code/baseball_pipeline/data/input_videos/SSG_삼성_10_14_2025_준플레이오프_4차전.mp4

✅ 영상 다운로드 완료
  video_stem: SSG_삼성_10_14_2025_준플레이오프_4차전
  경로: /workspace/skn17_final_runpod_code/baseball_pipeline/data/input_videos/SSG_삼성_10_14_2025_준플레이오프_4차전.mp4



In [4]:
# ==========================================
# Cell 3: Demucs 음성 분리
# ==========================================

from src.demucs import separate_video_with_demucs

print(f"[DEMUCS] 음성 분리 시작: {VIDEO_NAME}")
print(f"  모델: htdemucs")
print(f"  device: cuda")

track_dir = separate_video_with_demucs(
    video_name=VIDEO_NAME,
    device="cuda",
    model="htdemucs",
)

vocals_path = track_dir / "vocals.wav"
no_vocals_path = track_dir / "no_vocals.wav"

print(f"\n✅ 음성 분리 완료!")
print(f"  - vocals.wav    : {vocals_path}")
print(f"  - no_vocals.wav : {no_vocals_path}\n")


[DEMUCS] 음성 분리 시작: SSG_삼성_10_14_2025_준플레이오프_4차전.mp4
  모델: htdemucs
  device: cuda
[ffmpeg] ffmpeg -y -i /workspace/skn17_final_runpod_code/baseball_pipeline/data/input_videos/SSG_삼성_10_14_2025_준플레이오프_4차전.mp4 -ac 2 -ar 44100 /workspace/skn17_final_runpod_code/baseball_pipeline/data/demucs/input_wav/SSG_삼성_10_14_2025_준플레이오프_4차전.wav


ffmpeg version 6.1.1-3ubuntu5 Copyright (c) 2000-2023 the FFmpeg developers
  built with gcc 13 (Ubuntu 13.2.0-23ubuntu3)
  configuration: --prefix=/usr --extra-version=3ubuntu5 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --disable-omx --enable-gnutls --enable-libaom --enable-libass --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libglslang --enable-libgme --enable-libgsm --enable-libharfbuzz --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --ena

[demucs] /root/miniconda3/envs/skn17_final_env/bin/python -m demucs.separate -n htdemucs --two-stems=vocals -d cuda -o /workspace/skn17_final_runpod_code/baseball_pipeline/data/demucs/outputs /workspace/skn17_final_runpod_code/baseball_pipeline/data/demucs/input_wav/SSG_삼성_10_14_2025_준플레이오프_4차전.wav
===== demucs stdout =====
Selected model is a bag of 1 models. You will see that many progress bars per track.
Separated tracks will be stored in /workspace/skn17_final_runpod_code/baseball_pipeline/data/demucs/outputs/htdemucs
Separating track /workspace/skn17_final_runpod_code/baseball_pipeline/data/demucs/input_wav/SSG_삼성_10_14_2025_준플레이오프_4차전.wav

===== demucs stderr =====
Downloading: "https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/955717e8-8726e21a.th" to /root/.cache/torch/hub/checkpoints/955717e8-8726e21a.th

  0%|          | 0.00/80.2M [00:00<?, ?B/s]
 48%|████▊     | 38.6M/80.2M [00:00<00:00, 405MB/s]
 96%|█████████▋| 77.2M/80.2M [00:00<00:00, 395MB/s]
100%|██████████| 80

In [5]:
# ==========================================
# Cell 4: STT (Clova Speech API)
# ==========================================

from src.stt_pipeline import run_stt_pipeline

print(f"[STT] Clova STT 시작")
print(f"  입력: {vocals_path}")

STT_KEYWORD_XLSX = PROJECT_ROOT / "stt.xlsx"
if STT_KEYWORD_XLSX.exists():
    xlsx_path = STT_KEYWORD_XLSX
    use_domain = False
    print("  키워드: stt.xlsx 사용 (엑셀 부스팅)")
else:
    xlsx_path = None
    use_domain = True
    print("  키워드: 도메인 부스팅만 사용")

try:
    tts_csv_path, timeline_json_path = run_stt_pipeline(
        audio_path=vocals_path,
        invoke_url=CLOVA_INVOKE_URL,
        secret_key=CLOVA_SECRET_KEY,
        stt_raw_dir=STT_RAW_DIR,
        stt_seg_dir=STT_SEG_DIR,
        xlsx_keywords_path=xlsx_path,
        use_domain_boostings=use_domain,
        speaker_count_min=2,
        speaker_count_max=3,
        save_raw_json=True,
    )
    
    print(f"\n✅ STT 완료!")
    print(f"  - tts_phrases.csv : {tts_csv_path}")
    print(f"  - timeline.json   : {timeline_json_path}\n")

except Exception as e:
    print(f"\n❌ STT 실패: {e}")
    import traceback
    traceback.print_exc()
    raise

[STT] Clova STT 시작
  입력: /workspace/skn17_final_runpod_code/baseball_pipeline/data/demucs/outputs/htdemucs/SSG_삼성_10_14_2025_준플레이오프_4차전/vocals.wav
  키워드: stt.xlsx 사용 (엑셀 부스팅)
[STT_PIPELINE] Clova STT 요청 시작: /workspace/skn17_final_runpod_code/baseball_pipeline/data/demucs/outputs/htdemucs/SSG_삼성_10_14_2025_준플레이오프_4차전/vocals.wav
[STT_PIPELINE] raw JSON 저장 -> /workspace/skn17_final_runpod_code/baseball_pipeline/data/stt_raw/vocals.clova_raw.json
[STT_PIPELINE] phrase CSV 저장 -> /workspace/skn17_final_runpod_code/baseball_pipeline/data/stt_segments/vocals.tts_phrases.csv (rows=253)
[STT_PIPELINE] timeline JSON 저장 -> /workspace/skn17_final_runpod_code/baseball_pipeline/data/stt_segments/vocals.timeline.json

✅ STT 완료!
  - tts_phrases.csv : /workspace/skn17_final_runpod_code/baseball_pipeline/data/stt_segments/vocals.tts_phrases.csv
  - timeline.json   : /workspace/skn17_final_runpod_code/baseball_pipeline/data/stt_segments/vocals.timeline.json



In [7]:
# ==========================================
# Cell 5: STT 데이터 전처리
# ==========================================

from src.stt_utter_preprocess import build_utterances_from_clova_raw

vocals_stem = vocals_path.stem
raw_json_path = STT_RAW_DIR / f"{vocals_stem}.clova_raw.json"
utter_csv_path = STT_SEG_DIR / f"{video_stem}.utterances.csv"

print(f"[PREPROCESS] STT 전처리 시작")
print(f"  raw_json: {raw_json_path}")
print(f"  output: {utter_csv_path}")

try:
    utter_csv = build_utterances_from_clova_raw(
        raw_json_path=raw_json_path,
        out_csv_path=utter_csv_path,
    )
    
    print(f"\n✅ 전처리 완료: {utter_csv}\n")

except Exception as e:
    print(f"\n❌ 전처리 실패: {e}")
    import traceback
    traceback.print_exc()
    raise


[PREPROCESS] STT 전처리 시작
  raw_json: /workspace/skn17_final_runpod_code/baseball_pipeline/data/stt_raw/vocals.clova_raw.json
  output: /workspace/skn17_final_runpod_code/baseball_pipeline/data/stt_segments/SSG_삼성_10_14_2025_준플레이오프_4차전.utterances.csv
[PREPROCESS] STT 전처리 시작
  입력: /workspace/skn17_final_runpod_code/baseball_pipeline/data/stt_raw/vocals.clova_raw.json
  출력: /workspace/skn17_final_runpod_code/baseball_pipeline/data/stt_segments/SSG_삼성_10_14_2025_준플레이오프_4차전.utterances.csv
  - 총 segments: 135개
  - Speaker 통계: {'1': 41, '2': 79, '3': 15}
  - 캐스터 자동 할당: 2
[PREPROCESS] 완료!
  - 총 utterances: 88개
  - 캐스터: 52개
  - 해설: 36개
  - 출력: /workspace/skn17_final_runpod_code/baseball_pipeline/data/stt_segments/SSG_삼성_10_14_2025_준플레이오프_4차전.utterances.csv

✅ 전처리 완료: /workspace/skn17_final_runpod_code/baseball_pipeline/data/stt_segments/SSG_삼성_10_14_2025_준플레이오프_4차전.utterances.csv



In [9]:
# ==========================================
# Cell 6: VLM 스코어보드 추출 (최적화 버전)
# ==========================================

from src.utter_vlm_scoreboard import build_vlm_scoreboard_for_utterances

print(f"[VLM] 스코어보드 추출 시작 (5초 미만 최적화)")
print(f"  utterances: {utter_csv}")
print(f"  video: {local_video_path}")

vlm_csv_path = LLM_OUT_DIR / f"{video_stem}.utter_vlm_scoreboard.csv"

try:
    vlm_csv = build_vlm_scoreboard_for_utterances(
        utter_csv_path=utter_csv,
        video_path=local_video_path,
        frames_root_dir=FRAMES_ROOT,
        output_csv_path=vlm_csv_path,
        duration_threshold=5.0,  # 5초 미만은 중간 1장만
        resume=True,
    )
    
    print(f"\n✅ VLM 스코어보드 추출 완료: {vlm_csv}\n")

except Exception as e:
    print(f"\n❌ VLM 추출 실패: {e}")
    import traceback
    traceback.print_exc()
    raise
# 20

[02:33:19] INFO: VLM 스코어보드 추출 시작
[02:33:19] INFO: 입력 CSV: /workspace/skn17_final_runpod_code/baseball_pipeline/data/stt_segments/SSG_삼성_10_14_2025_준플레이오프_4차전.utterances.csv
[02:33:19] INFO: 비디오: /workspace/skn17_final_runpod_code/baseball_pipeline/data/input_videos/SSG_삼성_10_14_2025_준플레이오프_4차전.mp4
[02:33:19] INFO: 프레임 디렉토리: /workspace/skn17_final_runpod_code/baseball_pipeline/data/frames
[02:33:19] INFO: 출력 CSV: /workspace/skn17_final_runpod_code/baseball_pipeline/data/llm_outputs/SSG_삼성_10_14_2025_준플레이오프_4차전.utter_vlm_scoreboard.csv
[02:33:19] INFO: 단일 프레임 기준: 5.0초
[02:33:19] INFO: 총 88개 utterance
[02:33:19] INFO: 프레임 캡처 시작...


[VLM] 스코어보드 추출 시작 (5초 미만 최적화)
  utterances: /workspace/skn17_final_runpod_code/baseball_pipeline/data/stt_segments/SSG_삼성_10_14_2025_준플레이오프_4차전.utterances.csv
  video: /workspace/skn17_final_runpod_code/baseball_pipeline/data/input_videos/SSG_삼성_10_14_2025_준플레이오프_4차전.mp4


[02:33:20] INFO: 비디오 정보: FPS=59.94, 총 프레임=56486
[02:33:20] INFO: [1/88] utter_0: dur=22.7s (길음) → 3장
[h264 @ 0xb0e8b80] mmco: unref short failure
[h264 @ 0xb0e8b80] mmco: unref short failure
[02:33:20] INFO: [2/88] utter_1: dur=25.8s (길음) → 3장
[02:33:22] INFO: [3/88] utter_2: dur=12.5s (길음) → 3장
[02:33:23] INFO: [4/88] utter_3: dur=5.7s (길음) → 3장
[02:33:24] INFO: [5/88] utter_4: dur=13.0s (길음) → 3장
[02:33:25] INFO: [6/88] utter_5: dur=4.2s (짧음) → 중간 1장
[02:33:26] INFO: [7/88] utter_6: dur=4.1s (짧음) → 중간 1장
[02:33:26] INFO: [8/88] utter_7: dur=8.0s (길음) → 3장
[02:33:27] INFO: [9/88] utter_8: dur=7.4s (길음) → 3장
[02:33:29] INFO: [10/88] utter_9: dur=3.5s (짧음) → 중간 1장
[02:33:29] INFO: [11/88] utter_10: dur=4.5s (짧음) → 중간 1장
[02:33:29] INFO: [12/88] utter_11: dur=7.2s (길음) → 3장
[02:33:31] INFO: [13/88] utter_12: dur=10.3s (길음) → 3장
[02:33:32] INFO: [14/88] utter_13: dur=9.7s (길음) → 3장
[02:33:33] INFO: [15/88] utter_14: dur=18.7s (길음) → 3장
[02:33:35] INFO: [16/88] utter_15: dur=5.2s (길음) → 3장


✅ VLM 스코어보드 추출 완료: /workspace/skn17_final_runpod_code/baseball_pipeline/data/llm_outputs/SSG_삼성_10_14_2025_준플레이오프_4차전.utter_vlm_scoreboard.csv



In [10]:
# ==========================================
# Cell 7: Commentary Dialogue 생성
# ==========================================

import importlib
import src.commentary_dialogue
importlib.reload(src.commentary_dialogue)
from src.commentary_dialogue import create_dialogue_commentary

# VIDEO_NAME에서 경기 정보 자동 추출
# 예: "SSG_삼성_10_14_2025_준플레이오프_4차전.mp4"
# → "2025 준플레이오프 4차전 SSG vs 삼성"

video_parts = video_stem.split('_')
try:
    team1 = video_parts[0] if len(video_parts) > 0 else ""
    team2 = video_parts[1] if len(video_parts) > 1 else ""
    year = video_parts[4] if len(video_parts) > 4 else "2025"
    series = video_parts[5] if len(video_parts) > 5 else ""
    game_num = video_parts[6] if len(video_parts) > 6 else ""
    
    GAME_DESC = f"{year} {series} {game_num} {team1} vs {team2}"
    print(f"[INFO] 자동 생성된 경기 설명: {GAME_DESC}")
except:
    # 파싱 실패 시 기본값
    GAME_DESC = video_stem.replace('_', ' ')
    print(f"[INFO] 파일명 기반 경기 설명: {GAME_DESC}")

dialogue_csv_path = LLM_OUT_DIR / f"{video_stem}.dialogue_commentary.csv"

print(f"\n[DIALOGUE] 캐스터-해설 대화 생성 시작")
print(f"  경기: {GAME_DESC}")
print(f"  입력: {vlm_csv}")

try:
    dialogue_csv = create_dialogue_commentary(
        vlm_csv_path=vlm_csv,
        output_csv_path=dialogue_csv_path,
        game_desc=GAME_DESC,
        min_duration_for_analyst=5.0,
    )
    
    print(f"\n✅ 대화 생성 완료: {dialogue_csv}\n")
    
    # 결과 미리보기
    import pandas as pd
    result_df = pd.read_csv(dialogue_csv)
    print(f"📊 생성 통계:")
    print(f"  - 총 구간: {len(result_df)}개")
    print(f"  - 해설위원 발화: {(result_df['analyst'] != '').sum()}개")
    print(f"  - 캐스터만: {(result_df['analyst'] == '').sum()}개\n")

except Exception as e:
    print(f"\n❌ 대화 생성 실패: {e}")
    import traceback
    traceback.print_exc()
    raise

# 7분

[INFO] 자동 생성된 경기 설명: 2025 준플레이오프 4차전 SSG vs 삼성

[DIALOGUE] 캐스터-해설 대화 생성 시작
  경기: 2025 준플레이오프 4차전 SSG vs 삼성
  입력: /workspace/skn17_final_runpod_code/baseball_pipeline/data/llm_outputs/SSG_삼성_10_14_2025_준플레이오프_4차전.utter_vlm_scoreboard.csv
[DIALOGUE] 캐스터-해설 대화 생성
입력: /workspace/skn17_final_runpod_code/baseball_pipeline/data/llm_outputs/SSG_삼성_10_14_2025_준플레이오프_4차전.utter_vlm_scoreboard.csv
출력: /workspace/skn17_final_runpod_code/baseball_pipeline/data/llm_outputs/SSG_삼성_10_14_2025_준플레이오프_4차전.dialogue_commentary.csv
해설 최소 구간: 5.0초
[DIALOGUE] 총 88개 moment 로드
[DIALOGUE] 211개 주요 이벤트 감지
[DIALOGUE] loading base model on cuda ...


`torch_dtype` is deprecated! Use `dtype` instead!
[03:00:52] INFO: We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.17s/it]


[DIALOGUE] loading LoRA: SeHee8546/kanana-1.5-8b-pakchanho-lora
[DIALOGUE] model ready.

[1/88] t=0:00:07 (dur=22.7s, utter_id=0)
  캐스터 (10.2s): 이호성이 이 리드를 지켜냅니다. 끝나 이런 경기가 있습니다. 오중간을 갈라놓습니다....
  해설   (12.5s): 아직 한 방만 나오면 됩니다. 이제는 어떻게든지 아웃 카운트 하나라도 잡고, 점수를 내줬으면 다시 우리 쪽으로 넘겨주는 그런 ...

[2/88] t=0:00:30 (dur=25.8s, utter_id=1)
  캐스터 (47.1s): 1번 타자 박성한 2번 에레디아, 3번 타자 최정 4번 한유성, 5번 고명 등 수많은 한국 시리즈 무대를 올라섰던 김광현 선수...
  해설   (-21.3s): 김광현이라는 투수는 포크볼과 체인지업 위주로 변화구 피칭을 하는 투수로 유명했습니다. 그래서 상대 타자가 공을 기다리는 타이밍...

[3/88] t=0:00:55 (dur=12.5s, utter_id=2)
  캐스터 (3.8s): 태안 사람 태안으로 길이 보전하...
  해설   (8.7s): 네, 자, 어, 어제도 한번 말씀 드렸었는데요. 자, 어, 원래 이닝마다의 제일 중요한 순간이 바로 이 첫 번째 타자를 상대를...

[4/88] t=0:01:15 (dur=5.7s, utter_id=3)
  캐스터 (5.8s): 에레디아가 후라도 상대 홈런이 있습니다. 신선진...
  해설   (-0.0s): 네. 자, 우선 투수와 타자는 서로 눈빛이 통했다고 할까요? 그렇지는 않았겠죠. 왜냐하면은 갑자기 이렇게 투수가 바뀌었으니까요...

[5/88] t=0:01:23 (dur=13.0s, utter_id=4)
  캐스터 (13.6s): 지금 에레디아 최정, 한유섬의 스윙 삼진 연속 탈삼진 1회를 삼자 범퇴로 시작을 하고 있는 쿠라도 선수입니다....
  해설   (-0.6s): 쿠라도라는 투수가 이번 포

In [11]:
# ==========================================
# Cell 8: 데이터 형식 변환 (Adapter)
# ==========================================

from src.dialogue_to_tts_adapter import convert_dialogue_to_tts_format

tts_format_csv_path = LLM_OUT_DIR / f"{video_stem}.tts_phrases.from_dialogue.csv"

print(f"[ADAPTER] 데이터 형식 변환 시작")
print(f"  video_stem: {video_stem}")

try:
    tts_format_csv = convert_dialogue_to_tts_format(
        dialogue_csv_path=dialogue_csv,
        output_csv_path=tts_format_csv_path,
        video_stem=video_stem,
    )
    
    print(f"\n✅ 형식 변환 완료: {tts_format_csv}\n")
    
    # 결과 확인
    import pandas as pd
    converted_df = pd.read_csv(tts_format_csv)
    print(f"📊 변환 결과:")
    print(f"  - 총 utterances: {len(converted_df)}개")
    print(f"  - source_video: {converted_df['source_video'].iloc[0]}")
    print(f"  - 캐스터: {(converted_df['role'] == 'caster').sum()}개")
    print(f"  - 해설: {(converted_df['role'] == 'analyst').sum()}개\n")

except Exception as e:
    print(f"\n❌ 형식 변환 실패: {e}")
    import traceback
    traceback.print_exc()
    raise


[ADAPTER] 데이터 형식 변환 시작
  video_stem: SSG_삼성_10_14_2025_준플레이오프_4차전
[ADAPTER] 데이터 형식 변환 시작
  입력: /workspace/skn17_final_runpod_code/baseball_pipeline/data/llm_outputs/SSG_삼성_10_14_2025_준플레이오프_4차전.dialogue_commentary.csv
  video_stem: SSG_삼성_10_14_2025_준플레이오프_4차전
[ADAPTER] 변환 완료:
  출력: /workspace/skn17_final_runpod_code/baseball_pipeline/data/llm_outputs/SSG_삼성_10_14_2025_준플레이오프_4차전.tts_phrases.from_dialogue.csv
  총 moments: 88개
  총 utterances: 111개
    - 캐스터: 88개
    - 해설: 23개

✅ 형식 변환 완료: /workspace/skn17_final_runpod_code/baseball_pipeline/data/llm_outputs/SSG_삼성_10_14_2025_준플레이오프_4차전.tts_phrases.from_dialogue.csv

📊 변환 결과:
  - 총 utterances: 111개
  - source_video: SSG_삼성_10_14_2025_준플레이오프_4차전.mp4
  - 캐스터: 88개
  - 해설: 23개



In [13]:
# ==========================================
# Cell 9: TTS 음성 생성 (Fish-Speech API)
# ⚠️ 사전 준비: 별도 터미널에서 Fish-Speech 서버 실행 필요
# ==========================================

from src.tts_fishspeech_api import run_tts_batch_via_api

FISH_API_URL = "http://127.0.0.1:8080/v1/tts"

CASTER_REF_WAVS = [DATA_DIR / "tts_refs" / "caster_prompt_1.wav"]
ANALYST_REF_WAVS = [DATA_DIR / "tts_refs" / "analyst_pakchanho_prompt_1.wav"]

print(f"[TTS] Fish-Speech TTS 시작")
print(f"  API URL: {FISH_API_URL}")
print(f"  입력 CSV: {tts_format_csv}")
print(f"  캐스터 참조: {CASTER_REF_WAVS}")
print(f"  해설 참조: {ANALYST_REF_WAVS}")

try:
    tts_csv_with_paths = run_tts_batch_via_api(
        tts_csv_path=tts_format_csv,
        caster_ref_wavs=CASTER_REF_WAVS,
        analyst_ref_wavs=ANALYST_REF_WAVS,
        api_url=FISH_API_URL,
        # max_rows=5,  # 테스트용
    )
    
    print(f"\n✅ TTS 생성 완료: {tts_csv_with_paths}\n")
    
    # 결과 확인
    import pandas as pd
    tts_df = pd.read_csv(tts_csv_with_paths)
    print(f"📊 TTS 통계:")
    print(f"  - 총 생성: {len(tts_df)}개")
    print(f"  - 성공: {tts_df['tts_wav_path'].notna().sum()}개")
    print(f"  - 실패: {tts_df['tts_wav_path'].isna().sum()}개\n")

except Exception as e:
    print(f"\n❌ TTS 생성 실패: {e}")
    print(f"  Fish-Speech 서버가 실행 중인지 확인하세요!")
    import traceback
    traceback.print_exc()
    raise
# 5분

[TTS] Fish-Speech TTS 시작
  API URL: http://127.0.0.1:8080/v1/tts
  입력 CSV: /workspace/skn17_final_runpod_code/baseball_pipeline/data/llm_outputs/SSG_삼성_10_14_2025_준플레이오프_4차전.tts_phrases.from_dialogue.csv
  캐스터 참조: [PosixPath('/workspace/skn17_final_runpod_code/baseball_pipeline/data/tts_refs/caster_prompt_1.wav')]
  해설 참조: [PosixPath('/workspace/skn17_final_runpod_code/baseball_pipeline/data/tts_refs/analyst_pakchanho_prompt_1.wav')]
[TTS_API] 입력 CSV: /workspace/skn17_final_runpod_code/baseball_pipeline/data/llm_outputs/SSG_삼성_10_14_2025_준플레이오프_4차전.tts_phrases.from_dialogue.csv
[TTS_API] video_stem: SSG_삼성_10_14_2025_준플레이오프_4차전
[TTS_API] 출력 디렉토리: /workspace/skn17_final_runpod_code/baseball_pipeline/data/tts_audio/SSG_삼성_10_14_2025_준플레이오프_4차전
[TTS_API] loaded caster ref: /workspace/skn17_final_runpod_code/baseball_pipeline/data/tts_refs/caster_prompt_1.wav
[TTS_API] loaded analyst ref: /workspace/skn17_final_runpod_code/baseball_pipeline/data/tts_refs/analyst_pakchanho_prompt_1.wav
[TTS

In [17]:
# ==========================================
# Cell 10: 시간 정렬 전처리
# ==========================================

from src.llm_preprocess_align import preprocess_and_align_llm_csv

print(f"[ALIGN] 시간 정렬 전처리 시작")
print(f"  입력: {tts_csv_with_paths}")

try:
    aligned_csv = preprocess_and_align_llm_csv(
        llm_csv_path=tts_csv_with_paths,
        out_csv_path=None,  # 자동으로 .pre_aligned.csv 생성
        start_col="start_sec",
        end_col="end_sec",
        role_col="role",
        uttid_col="utterance_id",
        min_text_chars=2,
        merge_same_role=True,
        merge_gap_thresh_sec=0.25,
        merge_short_thresh_sec=1.0,
        min_gap_sec=0.02,
        caster_extra_ratio=0.2,
        analyst_extra_ratio=2.0,
        max_analyst_expand_sec=7.0,
        analyst_priority_min_overlap_sec=0.5,
    )
    
    print(f"\n✅ 시간 정렬 완료: {aligned_csv}\n")

except Exception as e:
    print(f"\n❌ 시간 정렬 실패: {e}")
    import traceback
    traceback.print_exc()
    raise


[ALIGN] 시간 정렬 전처리 시작
  입력: /workspace/skn17_final_runpod_code/baseball_pipeline/data/tts_audio/SSG_삼성_10_14_2025_준플레이오프_4차전/SSG_삼성_10_14_2025_준플레이오프_4차전.tts_phrases.with_tts_api.csv
[LLM_PRE_ALIGN] very short/empty rows removed: 2
[LLM_PRE_ALIGN] merged rows count: 111 -> 108
[LLM_PRE_ALIGN] analyst priority drop casters: 108 -> 103
[LLM_PRE_ALIGN] role=caster   orig=(7.000~17.222, dur=10.222) -> aligned=(7.000~17.202, dur=10.202)
[LLM_PRE_ALIGN] role=analyst  orig=(17.222~29.710, dur=12.488) -> aligned=(17.222~36.710, dur=19.488)
[LLM_PRE_ALIGN] role=caster   orig=(55.909~59.687, dur=3.778) -> aligned=(55.909~59.667, dur=3.758)
[LLM_PRE_ALIGN] role=analyst  orig=(59.687~68.414, dur=8.727) -> aligned=(59.687~75.414, dur=15.727)
[LLM_PRE_ALIGN] role=caster   orig=(75.970~81.748, dur=5.778) -> aligned=(75.970~82.903, dur=6.933)
[LLM_PRE_ALIGN] role=caster   orig=(83.550~97.106, dur=13.556) -> aligned=(83.550~98.330, dur=14.780)
[LLM_PRE_ALIGN] role=caster   orig=(98.350~104.350, dur=6.00

In [18]:
# ==========================================
# Cell 11: WSOLA 싱크 맞춤 (개선 버전)
# ==========================================

from src.tts_align_wsola_gap import build_wsola_tts_timeline_gap

tts_audio_out_dir = TTS_AUDIO_DIR / video_stem
tts_timeline_wav = tts_audio_out_dir / f"{video_stem}.tts_timeline.wav"

print(f"[WSOLA] TTS 음성 싱크 맞춤 시작 (Soft Speedup)")
print(f"  TTS 디렉토리: {tts_audio_out_dir}")
print(f"  출력: {tts_timeline_wav}")

try:
    final_tts_wav = build_wsola_tts_timeline_gap(
        llm_csv_path=aligned_csv,
        tts_audio_dir=tts_audio_out_dir,
        out_wav_path=tts_timeline_wav,
        start_col="start_sec",
        end_col="end_sec",
        role_col="role",
        uttid_col="utterance_id",
        min_gap_ms=60,
        tail_margin_ms=80,
        caster_max_speedup=1.3,   # 최대 1.3배 (soft는 1.4배)
        analyst_max_speedup=1.8,  # 최대 1.8배 (soft는 1.5배)
    )
    
    print(f"\n✅ WSOLA 싱크 맞춤 완료: {final_tts_wav}\n")

except Exception as e:
    print(f"\n❌ WSOLA 싱크 실패: {e}")
    import traceback
    traceback.print_exc()
    raise

[WSOLA] TTS 음성 싱크 맞춤 시작 (Soft Speedup)
  TTS 디렉토리: /workspace/skn17_final_runpod_code/baseball_pipeline/data/tts_audio/SSG_삼성_10_14_2025_준플레이오프_4차전
  출력: /workspace/skn17_final_runpod_code/baseball_pipeline/data/tts_audio/SSG_삼성_10_14_2025_준플레이오프_4차전/SSG_삼성_10_14_2025_준플레이오프_4차전.tts_timeline.wav
[WSOLA_GAP] last_end=956.786s, total≈956.926s
[WSOLA_GAP] samplerate=44100, channels=1
[WSOLA_GAP] utt=0_caster role=caster   slot=(  7.000~ 17.202) logical=(  7.000~ 17.122) orig= 6.269s -> target=10.122s rate=1.000
[WSOLA_GAP] utt=0_analyst role=analyst  slot=( 17.222~ 36.710) logical=( 17.222~ 36.630) orig=27.910s -> target=19.408s rate=0.695
[WSOLA_GAP] utt=2_caster role=caster   slot=( 55.909~ 59.667) logical=( 55.909~ 59.587) orig= 2.415s -> target= 3.678s rate=1.000
[WSOLA_GAP] utt=2_analyst role=analyst  slot=( 59.687~ 75.414) logical=( 59.687~ 75.334) orig=33.855s -> target=15.647s rate=0.556
[WSOLA_GAP] utt=3_caster role=caster   slot=( 75.970~ 82.903) logical=( 75.970~ 82.823) orig= 

In [19]:
# ==========================================
# Cell 12: 최종 영상 인코딩
# ==========================================

import subprocess

def merge_audio_and_encode_video(
    original_video_path: Path,
    tts_vocals_path: Path,
    bg_no_vocals_path: Path,
    output_video_path: Path,
    tts_volume: float = 1.0,
    bg_volume: float = 0.3,
) -> Path:
    """
    1) TTS vocals + 배경음(no_vocals) 믹싱
    2) 원본 비디오와 합쳐서 최종 영상 생성
    """
    output_video_path.parent.mkdir(parents=True, exist_ok=True)
    
    cmd = [
        "ffmpeg", "-y",
        "-i", str(original_video_path),
        "-i", str(tts_vocals_path),
        "-i", str(bg_no_vocals_path),
        "-filter_complex",
        f"[1:a]volume={tts_volume}[a1];[2:a]volume={bg_volume}[a2];[a1][a2]amix=inputs=2:duration=first[amix]",
        "-map", "0:v:0",
        "-map", "[amix]",
        "-c:v", "copy",
        "-c:a", "aac",
        "-shortest",
        str(output_video_path),
    ]
    
    print("[ENCODING] 최종 영상 인코딩 중...")
    print(" ".join(cmd))
    subprocess.run(cmd, check=True)
    
    print(f"[ENCODING] 완료: {output_video_path}")
    return output_video_path


final_video_path = OUTPUT_VIDEO_DIR / f"{video_stem}.final.mp4"

print(f"[ENCODING] 최종 영상 생성 시작")
print(f"  원본 비디오: {local_video_path}")
print(f"  TTS 음성: {final_tts_wav}")
print(f"  배경음: {no_vocals_path}")
print(f"  출력: {final_video_path}")

try:
    final_video = merge_audio_and_encode_video(
        original_video_path=local_video_path,
        tts_vocals_path=final_tts_wav,
        bg_no_vocals_path=no_vocals_path,
        output_video_path=final_video_path,
        tts_volume=1.0,
        bg_volume=0.3,
    )
    
    print(f"\n{'='*80}")
    print("🎉 전체 파이프라인 완료!")
    print(f"{'='*80}")
    print(f"최종 영상: {final_video}")
    print(f"파일 크기: {final_video.stat().st_size / (1024**2):.2f} MB")
    print(f"{'='*80}\n")

except Exception as e:
    print(f"\n❌ 최종 인코딩 실패: {e}")
    import traceback
    traceback.print_exc()
    raise


# ==========================================
# Cell 13: 전체 데이터 흐름 검증
# ==========================================

print("\n" + "="*80)
print("📊 전체 데이터 흐름 검증")
print("="*80)

files_to_check = [
    ("원본 영상", local_video_path),
    ("Demucs vocals", vocals_path),
    ("Demucs no_vocals", no_vocals_path),
    ("STT raw JSON", raw_json_path),
    ("STT utterances", utter_csv),
    ("VLM 스코어보드", vlm_csv),
    ("Dialogue", dialogue_csv),
    ("TTS 형식 변환", tts_format_csv),
    ("TTS 생성 결과", tts_csv_with_paths),
    ("시간 정렬", aligned_csv),
    ("WSOLA 최종 음성", final_tts_wav),
    ("최종 영상", final_video),
]

all_exist = True

for name, path in files_to_check:
    exists = path.exists()
    status = "✅" if exists else "❌"
    print(f"{status} {name:20s}: {path}")
    
    if exists and path.is_file():
        size_mb = path.stat().st_size / (1024**2)
        print(f"   크기: {size_mb:.2f} MB")
    
    if not exists:
        all_exist = False

print("="*80)

if all_exist:
    print("✅ 모든 파일이 정상적으로 생성되었습니다!")
else:
    print("❌ 일부 파일이 누락되었습니다. 위 목록을 확인하세요.")

print("="*80 + "\n")


[ENCODING] 최종 영상 생성 시작
  원본 비디오: /workspace/skn17_final_runpod_code/baseball_pipeline/data/input_videos/SSG_삼성_10_14_2025_준플레이오프_4차전.mp4
  TTS 음성: /workspace/skn17_final_runpod_code/baseball_pipeline/data/tts_audio/SSG_삼성_10_14_2025_준플레이오프_4차전/SSG_삼성_10_14_2025_준플레이오프_4차전.tts_timeline.wav
  배경음: /workspace/skn17_final_runpod_code/baseball_pipeline/data/demucs/outputs/htdemucs/SSG_삼성_10_14_2025_준플레이오프_4차전/no_vocals.wav
  출력: /workspace/skn17_final_runpod_code/baseball_pipeline/data/output_videos/SSG_삼성_10_14_2025_준플레이오프_4차전.final.mp4
[ENCODING] 최종 영상 인코딩 중...
ffmpeg -y -i /workspace/skn17_final_runpod_code/baseball_pipeline/data/input_videos/SSG_삼성_10_14_2025_준플레이오프_4차전.mp4 -i /workspace/skn17_final_runpod_code/baseball_pipeline/data/tts_audio/SSG_삼성_10_14_2025_준플레이오프_4차전/SSG_삼성_10_14_2025_준플레이오프_4차전.tts_timeline.wav -i /workspace/skn17_final_runpod_code/baseball_pipeline/data/demucs/outputs/htdemucs/SSG_삼성_10_14_2025_준플레이오프_4차전/no_vocals.wav -filter_complex [1:a]volume=1.0[a1];[2:a]vol

ffmpeg version 6.1.1-3ubuntu5 Copyright (c) 2000-2023 the FFmpeg developers
  built with gcc 13 (Ubuntu 13.2.0-23ubuntu3)
  configuration: --prefix=/usr --extra-version=3ubuntu5 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --disable-omx --enable-gnutls --enable-libaom --enable-libass --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libglslang --enable-libgme --enable-libgsm --enable-libharfbuzz --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --ena

[ENCODING] 완료: /workspace/skn17_final_runpod_code/baseball_pipeline/data/output_videos/SSG_삼성_10_14_2025_준플레이오프_4차전.final.mp4

🎉 전체 파이프라인 완료!
최종 영상: /workspace/skn17_final_runpod_code/baseball_pipeline/data/output_videos/SSG_삼성_10_14_2025_준플레이오프_4차전.final.mp4
파일 크기: 464.52 MB


📊 전체 데이터 흐름 검증
✅ 원본 영상               : /workspace/skn17_final_runpod_code/baseball_pipeline/data/input_videos/SSG_삼성_10_14_2025_준플레이오프_4차전.mp4
   크기: 471.06 MB
✅ Demucs vocals       : /workspace/skn17_final_runpod_code/baseball_pipeline/data/demucs/outputs/htdemucs/SSG_삼성_10_14_2025_준플레이오프_4차전/vocals.wav
   크기: 158.54 MB
✅ Demucs no_vocals    : /workspace/skn17_final_runpod_code/baseball_pipeline/data/demucs/outputs/htdemucs/SSG_삼성_10_14_2025_준플레이오프_4차전/no_vocals.wav
   크기: 158.54 MB
✅ STT raw JSON        : /workspace/skn17_final_runpod_code/baseball_pipeline/data/stt_raw/vocals.clova_raw.json
   크기: 0.18 MB
✅ STT utterances      : /workspace/skn17_final_runpod_code/baseball_pipeline/data/stt_segments/SSG_삼성_10_

[out#0/mp4 @ 0x63f4643e2700] video:466198kB audio:8022kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.304856%
size=  475665kB time=00:15:42.42 bitrate=4134.7kbits/s speed=40.1x    
[aac @ 0x63f4643e4240] Qavg: 202.753
