# Text-to-Media: Ultimate Video Generation (Colab, Local Upload)

This Colab sets up and runs the full video pipeline using a project zip you upload from your local machine.

Pipeline: Stable Diffusion → AnimateDiff → StableSR → RealESRGAN → FILM → FFmpeg

Instructions:
1) Zip your local `text-to-media-app` folder.
2) Run the upload cell below and select the zip.
3) Run the install and model download cells.
4) Run the test generation cell and download the MP4.

Notes:
- Requires a Colab GPU (T4/A100). CPU is not recommended.
- Downloads are large (10–20+ GB). Ensure runtime time/storage.
- Outputs saved to `/content/text-to-media-app/outputs/videos`.


In [None]:
# Check GPU
nvidia-smi || echo "No NVIDIA GPU detected (CPU runtime)."


In [None]:
# Upload your zipped local project (text-to-media-app.zip)
from google.colab import files
import os, shutil, zipfile
from pathlib import Path

upload = files.upload()  # Select your project zip
zip_name = next(iter(upload.keys()))
print('Uploaded:', zip_name)

# Prepare workspace
root = Path('/content')
workdir = root / 'text-to-media-app'
if workdir.exists():
    shutil.rmtree(workdir)
workdir.mkdir(parents=True, exist_ok=True)

# Extract zip into /content
with zipfile.ZipFile(zip_name, 'r') as z:
    z.extractall(root)

# If the zip contains a top-level folder, ensure path normalization
if not workdir.exists():
    # Try to find extracted folder
    cands = [p for p in root.iterdir() if p.is_dir() and p.name.startswith('text-to-media-app')]
    if cands:
        cands[0].rename(workdir)

print('Workspace ready at:', workdir)
!ls -la /content/text-to-media-app


In [None]:
# System deps
apt-get update -y && apt-get install -y ffmpeg git-lfs

git lfs install

# Ensure we are in the uploaded workspace
cd /content/text-to-media-app
pwd && ls -la


In [None]:
# Python deps
python -m pip install -U pip setuptools wheel
python -m pip install --extra-index-url https://download.pytorch.org/whl/cu121 -r backend/requirements.txt

# Reduce TF logs
export TF_CPP_MIN_LOG_LEVEL=2


In [None]:
# Download models (priority first)
cd /content/text-to-media-app/scripts
python download-models.py --priority || true
python download-models.py --list || true


In [None]:
# Verify complete model downloads before running
from pathlib import Path
import sys, os

ROOT = Path('/content')
MODELS = ROOT / 'models'
SD_DIR = MODELS / 'image' / 'stable-diffusion'
AD_DIR = MODELS / 'video' / 'animatediff'
AD_ADAPTER_DIR = AD_DIR / 'motion_adapter'
SR_DIR = MODELS / 'upscaling' / 'stablesr'
REALESRGAN_DIR = MODELS / 'upscaling' / 'realesrgan'
FILM_DIR = MODELS / 'interpolation' / 'film'

errors = []

# Stable Diffusion checks
if not SD_DIR.exists():
    errors.append('Stable Diffusion directory missing')
else:
    req_dirs = ['text_encoder','unet','vae','scheduler','tokenizer']
    for d in req_dirs:
        if not (SD_DIR / d).exists():
            errors.append(f'Stable Diffusion missing dir: {d}')
    # require at least one weight file
    has_sd_weights = any(list((SD_DIR).rglob(ext)) for ext in ['*.safetensors','*.bin','*.ckpt'])
    if not has_sd_weights:
        errors.append('Stable Diffusion weights not found')

# AnimateDiff checks
if not AD_DIR.exists():
    errors.append('AnimateDiff repo missing')
if not AD_ADAPTER_DIR.exists():
    errors.append('AnimateDiff motion_adapter missing')
else:
    has_adapt_weights = any(list(AD_ADAPTER_DIR.rglob(ext)) for ext in ['*.safetensors','*.bin'])
    if not has_adapt_weights:
        errors.append('AnimateDiff motion adapter weights missing')

# StableSR assets (x4 upscaler snapshot used as dependency assets)
if not SR_DIR.exists():
    errors.append('StableSR upscaler assets missing')

# RealESRGAN weights
req_realesr = [
    REALESRGAN_DIR / 'RealESRGAN_x4plus.pth',
    REALESRGAN_DIR / 'RealESRGAN_x4plus_anime_6B.pth'
]
for p in req_realesr:
    if not p.exists() or p.stat().st_size == 0:
        errors.append(f'RealESRGAN weight missing: {p.name}')

# FILM repo presence (weights come from TF Hub at runtime)
if not FILM_DIR.exists():
    errors.append('FILM repository not found')

if errors:
    print('❌ Model verification failed:')
    for e in errors:
        print(' -', e)
    raise SystemExit('Please re-run the download cell; required model files are missing.')
else:
    print('✅ All required model files verified.')

# Run a short end-to-end generation with the Ultimate pipeline
import os, sys, asyncio
from pathlib import Path

%cd /content/text-to-media-app
sys.path.append('/content/text-to-media-app/backend')

from backend.utils.gpu_detector import GPUDetector
from backend.models.video_generator import VideoGenerator

info = GPUDetector().gpu_info
print('GPU info:', info)

async def run():
    gen = VideoGenerator(info)
    ok = await gen.load_model('ultimate-pipeline')
    if not ok:
        raise RuntimeError('Failed to load ultimate pipeline')

    prompts = [
        'A cinematic cyberpunk city at night, neon lights, rain reflections, flying cars, holograms, highly detailed',
        'A serene forest with fireflies at dusk, volumetric lighting, ultra-detailed, macro style',
        'An astronaut surfing waves on an alien ocean under two moons, dramatic lighting, 4k detail'
    ]

    outputs = []
    for p in prompts:
        out_path = await gen.generate(
            prompt=p,
            model_name='ultimate-pipeline',
            duration=3,
            output_format='mp4',
            preset='ultra-fast',
            fps=8,
            num_inference_steps=20,
            motion_scale=1.2,
        )
        print('Generated:', out_path)
        outputs.append(out_path)
    return outputs

outs = asyncio.run(run())
print('Outputs:', outs)

# Confirm files exist
for o in outs:
    assert Path(o).exists(), f'Output not found: {o}'


In [None]:
# Download generated videos
from google.colab import files
import glob

videos = sorted(glob.glob('/content/text-to-media-app/outputs/videos/*.mp4'))
print('Found videos:', videos[-5:])
for v in videos[-3:]:  # offer last 3 for download
    try:
        files.download(v)
    except Exception as e:
        print('Download failed for', v, e)


# Text-to-Media: Ultimate Video Generation (Google Colab)

This Colab sets up the full video pipeline:
- Stable Diffusion → AnimateDiff → StableSR → RealESRGAN → FILM → FFmpeg
- Downloads all required models into the runtime
- Runs a short end-to-end generation and provides a downloadable MP4

Notes:
- Colab T4/A100 GPUs are supported. CPU is not recommended.
- Total downloads can be large (10–20+ GB). Ensure you have session time and storage.
- Output is saved under `/content/text-to-media-app/outputs/videos`.


In [None]:
# Check GPU
nvidia-smi || echo "No NVIDIA GPU detected (CPU runtime)."


In [None]:
# System deps
apt-get update -y && apt-get install -y ffmpeg git-lfs

# Enable Git LFS
git lfs install

# Clone repo (shallow)
cd /content
if [ ! -d "/content/text-to-media-app" ]; then
  git clone --depth 1 https://github.com/your-org-or-user/text-to-media-app.git
fi
cd /content/text-to-media-app

# Optional: pull latest
git pull || true


In [None]:
# Python deps (Colab GPU CUDA 12.1)
python -m pip install -U pip setuptools wheel
# Prefer PyTorch CUDA wheels if available in this runtime
python -m pip install --extra-index-url https://download.pytorch.org/whl/cu121 -r backend/requirements.txt

# Reduce TF logs
export TF_CPP_MIN_LOG_LEVEL=2


In [None]:
# Download models (priority first to save time)
cd /content/text-to-media-app/scripts
python download-models.py --priority || true
# Verify what we have
python download-models.py --list || true


In [None]:
# Run a short end-to-end generation with the Ultimate pipeline
import os, sys, asyncio, json, glob
from pathlib import Path

%cd /content/text-to-media-app
sys.path.append('/content/text-to-media-app/backend')

from backend.utils.gpu_detector import GPUDetector
from backend.models.video_generator import VideoGenerator

# Detect GPU/device
info = GPUDetector().gpu_info
print('GPU info:', info)

async def run():
    gen = VideoGenerator(info)
    # Load the ultimate pipeline
    ok = await gen.load_model('ultimate-pipeline')
    if not ok:
        raise RuntimeError('Failed to load ultimate pipeline')

    prompt = 'A cinematic cyberpunk city at night, neon lights, rain reflections, flying cars, holograms, highly detailed, dramatic lighting'
    # Keep duration short (<= 3-4s) for Colab memory/time
    out_path = await gen.generate(
        prompt=prompt,
        model_name='ultimate-pipeline',
        duration=3,
        output_format='mp4',
        preset='ultra-fast',
        fps=8,
        num_inference_steps=20,
        motion_scale=1.2,
    )
    return out_path

out = asyncio.run(run())
print('Output:', out)

# Confirm file exists
assert Path(out).exists(), 'Output video not found'


In [None]:
# Download the resulting video
from google.colab import files
from pathlib import Path
import glob

candidates = sorted(glob.glob('/content/text-to-media-app/outputs/videos/*.mp4'))
print('Found videos:', candidates[-3:])
if candidates:
    files.download(candidates[-1])
