<a href="https://colab.research.google.com/github/udaycharanrada/2d-to-pixar/blob/main/2D_to_Pixar_clean.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -U diffusers==0.26.3 huggingface_hub==0.25.2 transformers accelerate safetensors torch torchvision pillow ipywidgets


In [None]:
from diffusers import StableDiffusionXLImg2ImgPipeline
import torch, random, os
from PIL import Image
import numpy as np, cv2
from IPython.display import display
import ipywidgets as widgets

device = "cuda" if torch.cuda.is_available() else "cpu"

pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16,
    use_safetensors=True
).to(device)

pipe.enable_attention_slicing()
pipe.enable_vae_tiling()

prompt = (
    "Pixar-style 3D render of a cartoon boy, perfect natural hands with five fingers, "
    "correct anatomy, detailed eyes, realistic proportions, cinematic lighting, smooth face, "
    "Pixar/Disney movie style, ultra-detailed, soft shadows, professional 3D shading"
)

negative_prompt = (
    "extra fingers, missing fingers, deformed hands, bad anatomy, distorted face, "
    "mutated limbs, low-res, blurry, watermark, text, creepy eyes, doll-like plastic texture"
)

# ---- FIX FOR LOCAL SYSTEM ----
image_path = input("Enter the path of your input image: ").strip()
init_image = Image.open(image_path).convert("RGB")

# Generate 5 outputs
os.makedirs("pixar_safe_variations", exist_ok=True)
files_list = []

for i in range(5):
    print(f"üé® Generating Pixar render {i+1}/5 (safe mode)...")
    image = pipe(
        prompt=prompt,
        image=init_image,
        strength=0.65,
        guidance_scale=12,
        num_inference_steps=90,
        negative_prompt=negative_prompt,
        generator=torch.Generator(device).manual_seed(random.randint(0, 999999))
    ).images[0]

    image = pipe(
        prompt=prompt,
        image=image,
        strength=0.3,
        guidance_scale=9,
        num_inference_steps=40,
        negative_prompt=negative_prompt,
    ).images[0]

    fname = f"pixar_safe_variations/pixar_safe_{i+1}.png"
    image.save(fname)
    files_list.append(fname)

print("All 5 Pixar images generated successfully!")

# Scoring
def score_image(img):
    img_np = np.array(img.convert("RGB"))
    gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
    sharpness = cv2.Laplacian(gray, cv2.CV_64F).var()
    contrast = img_np.std()
    return sharpness * 0.6 + contrast * 0.4

scores = [score_image(Image.open(f)) for f in files_list]
best_idx = int(np.argmax(scores))

print(f"Best image: {files_list[best_idx]} (score {scores[best_idx]:.2f})")


In [None]:
# ============================================================
# üéû 2D ‚Üí 3D Parallax Motion Generator (Fixed Image Type Error)
# ============================================================

!pip install torch torchvision opencv-python moviepy --quiet

import torch, cv2, numpy as np
from PIL import Image
from moviepy.editor import ImageSequenceClip
from google.colab import files
import torchvision.transforms as T
import os

# ----------------------------------------
# 1Ô∏è‚É£ Upload your 2D image
# ----------------------------------------
uploaded = files.upload()
filename = next(iter(uploaded))
img = Image.open(filename).convert("RGB")
img_np = np.array(img)
H, W = img_np.shape[:2]
print(f"‚úÖ Image loaded: {filename}, size = {W}x{H}")

# ----------------------------------------
# 2Ô∏è‚É£ Load MiDaS depth-estimation model (Torch Hub)
# ----------------------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
midas = torch.hub.load("intel-isl/MiDaS", "DPT_Large").to(device).eval()

# Get transforms for DPT model
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
transform = midas_transforms.dpt_transform

# ----------------------------------------
# 3Ô∏è‚É£ Run depth prediction (Fix: convert PIL ‚Üí NumPy first)
# ----------------------------------------
# MiDaS expects NumPy array
img_cv = np.array(img)[:, :, ::-1]  # convert RGB ‚Üí BGR for MiDaS
input_batch = transform(img_cv).to(device)

with torch.no_grad():
    prediction = midas(input_batch)
    prediction = torch.nn.functional.interpolate(
        prediction.unsqueeze(1),
        size=(H, W),
        mode="bicubic",
        align_corners=False,
    ).squeeze()

depth = prediction.cpu().numpy()
depth_norm = (depth - depth.min()) / (depth.max() - depth.min() + 1e-8)
print("‚úÖ Depth map computed successfully")

# ----------------------------------------
# 4Ô∏è‚É£ Generate parallax motion frames
# ----------------------------------------
n_frames = 75
motion_scale = 0.05
frames = []

for i in range(n_frames):
    t = i / (n_frames - 1)
    shift_x = int((t - 0.5) * motion_scale * W)
    shift_y = int((0.5 - t) * motion_scale * H)

    flow_x = depth_norm * shift_x
    flow_y = depth_norm * shift_y
    map_x, map_y = np.meshgrid(np.arange(W), np.arange(H))
    map_x = (map_x + flow_x).astype(np.float32)
    map_y = (map_y + flow_y).astype(np.float32)

    warped = cv2.remap(
        img_np, map_x, map_y,
        interpolation=cv2.INTER_LINEAR,
        borderMode=cv2.BORDER_REFLECT
    )
    frames.append(cv2.cvtColor(warped, cv2.COLOR_RGB2BGR))

print("‚úÖ Generated parallax frames successfully")

# ----------------------------------------
# 5Ô∏è‚É£ Export as 5-second MP4 video
# ----------------------------------------
fps = int(n_frames / 5)
video_path = "parallax_3d_motion.mp4"
clip = ImageSequenceClip(frames, fps=fps)
clip.write_videofile(video_path, codec="libx264", audio=False)
files.download(video_path)
print("üé• 3D parallax video ready:", video_path)
