# üîÆ Glimpse3D - gsplat Gaussian Optimization

**Optimize Gaussian Splats using multi-view images**

This notebook refines the initial Gaussian point cloud using multi-view supervision.

## Pipeline Role
```
TripoSR ‚Üí [This Notebook] ‚Üí Optimized Splats ‚Üí Rendering ‚Üí Enhancement
```

## Requirements
- Google Colab with **T4 GPU** (15GB VRAM)
- Input: Gaussian PLY from TripoSR + Multi-view images from SyncDreamer

---

## 1Ô∏è‚É£ Check GPU & Environment

In [None]:
import sys
IN_COLAB = 'google.colab' in sys.modules
print(f"Running in Colab: {IN_COLAB}")

!nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv

import torch
print(f"\nPyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## 2Ô∏è‚É£ Install gsplat & Dependencies

In [None]:
%%capture
# Install gsplat (CUDA-accelerated Gaussian Splatting)
!pip install gsplat --quiet

# Additional dependencies
!pip install plyfile numpy pillow matplotlib tqdm --quiet
!pip install torch torchvision --quiet

print("‚úÖ gsplat installed!")

In [None]:
# Verify gsplat installation
import gsplat
print(f"‚úÖ gsplat version: {gsplat.__version__}")

## 3Ô∏è‚É£ Load Gaussian PLY

In [None]:
from google.colab import files
import os

# Create working directory
WORK_DIR = "/content/gsplat_work"
os.makedirs(WORK_DIR, exist_ok=True)

# Upload Gaussian PLY
print("üì§ Upload gaussian_splat.ply from TripoSR:")
uploaded = files.upload()
PLY_PATH = os.path.join(WORK_DIR, list(uploaded.keys())[0])
with open(PLY_PATH, 'wb') as f:
    f.write(list(uploaded.values())[0])
print(f"‚úÖ Loaded: {PLY_PATH}")

In [None]:
import numpy as np
from plyfile import PlyData
import torch

def load_gaussian_ply(path):
    """Load Gaussian Splat PLY file into tensors."""
    plydata = PlyData.read(path)
    vertex = plydata['vertex']
    
    # Positions
    xyz = np.stack([vertex['x'], vertex['y'], vertex['z']], axis=-1)
    
    # SH DC coefficients
    f_dc = np.stack([vertex['f_dc_0'], vertex['f_dc_1'], vertex['f_dc_2']], axis=-1)
    
    # SH rest coefficients
    f_rest_names = [f'f_rest_{i}' for i in range(45)]
    f_rest = np.stack([vertex[name] for name in f_rest_names if name in vertex.data.dtype.names], axis=-1)
    
    # Opacity
    opacity = vertex['opacity']
    
    # Scales
    scales = np.stack([vertex['scale_0'], vertex['scale_1'], vertex['scale_2']], axis=-1)
    
    # Rotations (quaternion)
    rotations = np.stack([vertex['rot_0'], vertex['rot_1'], vertex['rot_2'], vertex['rot_3']], axis=-1)
    
    return {
        'xyz': torch.tensor(xyz, dtype=torch.float32),
        'f_dc': torch.tensor(f_dc, dtype=torch.float32),
        'f_rest': torch.tensor(f_rest, dtype=torch.float32),
        'opacity': torch.tensor(opacity, dtype=torch.float32),
        'scales': torch.tensor(scales, dtype=torch.float32),
        'rotations': torch.tensor(rotations, dtype=torch.float32),
    }

# Load Gaussians
gaussians = load_gaussian_ply(PLY_PATH)
print(f"‚úÖ Loaded {len(gaussians['xyz']):,} Gaussians")
print(f"   Bounds: {gaussians['xyz'].min(0).values.numpy()} to {gaussians['xyz'].max(0).values.numpy()}")

## 4Ô∏è‚É£ Upload Multi-View Images (Optional)

If you have multi-view images from SyncDreamer, upload them for optimization.

In [None]:
import zipfile
from PIL import Image
import matplotlib.pyplot as plt

MULTIVIEW_DIR = os.path.join(WORK_DIR, "multiview")
os.makedirs(MULTIVIEW_DIR, exist_ok=True)

use_multiview = input("Do you have multi-view images? (y/n): ").lower() == 'y'

if use_multiview:
    print("üì§ Upload multi-view images (ZIP file or individual PNGs):")
    uploaded_mv = files.upload()
    
    for fname, content in uploaded_mv.items():
        if fname.endswith('.zip'):
            # Extract ZIP
            zip_path = os.path.join(WORK_DIR, fname)
            with open(zip_path, 'wb') as f:
                f.write(content)
            with zipfile.ZipFile(zip_path, 'r') as z:
                z.extractall(MULTIVIEW_DIR)
            print(f"‚úÖ Extracted {fname}")
        else:
            # Save individual image
            img_path = os.path.join(MULTIVIEW_DIR, fname)
            with open(img_path, 'wb') as f:
                f.write(content)
    
    # List images
    image_files = sorted([f for f in os.listdir(MULTIVIEW_DIR) if f.endswith(('.png', '.jpg'))])
    print(f"\n‚úÖ Found {len(image_files)} multi-view images")
else:
    image_files = []
    print("‚è≠Ô∏è Skipping multi-view optimization (will use synthetic views)")

## 5Ô∏è‚É£ Setup Camera System

In [None]:
import math

def generate_camera_poses(n_views=16, radius=2.0, elevation=30.0):
    """
    Generate camera poses around the object.
    Returns world-to-camera matrices (4x4).
    """
    poses = []
    elevation_rad = math.radians(elevation)
    
    for i in range(n_views):
        azimuth = 2 * math.pi * i / n_views
        
        # Camera position in world coordinates
        x = radius * math.cos(elevation_rad) * math.cos(azimuth)
        y = radius * math.cos(elevation_rad) * math.sin(azimuth)
        z = radius * math.sin(elevation_rad)
        
        # Look at origin
        cam_pos = np.array([x, y, z])
        look_at = np.array([0, 0, 0])
        up = np.array([0, 0, 1])
        
        # Camera basis vectors
        forward = look_at - cam_pos
        forward = forward / np.linalg.norm(forward)
        
        right = np.cross(forward, up)
        right = right / np.linalg.norm(right)
        
        up_new = np.cross(right, forward)
        
        # World-to-camera matrix
        w2c = np.eye(4)
        w2c[:3, 0] = right
        w2c[:3, 1] = up_new
        w2c[:3, 2] = -forward
        w2c[:3, 3] = -w2c[:3, :3] @ cam_pos
        
        poses.append(w2c)
    
    return np.stack(poses)

def get_projection_matrix(fov_deg=60, aspect=1.0, near=0.1, far=100.0):
    """Create OpenGL-style projection matrix."""
    fov_rad = math.radians(fov_deg)
    f = 1.0 / math.tan(fov_rad / 2)
    
    proj = np.zeros((4, 4))
    proj[0, 0] = f / aspect
    proj[1, 1] = f
    proj[2, 2] = (far + near) / (near - far)
    proj[2, 3] = 2 * far * near / (near - far)
    proj[3, 2] = -1
    
    return proj

# Generate camera system
N_VIEWS = 16
IMAGE_SIZE = 512

camera_poses = generate_camera_poses(n_views=N_VIEWS, radius=2.0, elevation=30.0)
projection = get_projection_matrix(fov_deg=60, aspect=1.0)

print(f"‚úÖ Generated {N_VIEWS} camera poses")

## 6Ô∏è‚É£ Define Gaussian Parameters

In [None]:
import torch
import torch.nn as nn

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class GaussianModel(nn.Module):
    """Differentiable Gaussian Splat Model."""
    
    def __init__(self, gaussians):
        super().__init__()
        n_points = len(gaussians['xyz'])
        
        # Learnable parameters
        self.xyz = nn.Parameter(gaussians['xyz'].clone())
        self.f_dc = nn.Parameter(gaussians['f_dc'].clone())
        self.f_rest = nn.Parameter(gaussians['f_rest'].clone())
        self.opacity_raw = nn.Parameter(gaussians['opacity'].clone())
        self.scales_raw = nn.Parameter(gaussians['scales'].clone())
        self.rotations = nn.Parameter(gaussians['rotations'].clone())
        
    @property
    def opacity(self):
        return torch.sigmoid(self.opacity_raw)
    
    @property
    def scales(self):
        return torch.exp(self.scales_raw)
    
    def get_colors(self):
        """Get RGB colors from SH DC coefficients."""
        C0 = 0.28209479177387814
        return 0.5 + C0 * self.f_dc
    
    def forward(self):
        return {
            'xyz': self.xyz,
            'colors': self.get_colors(),
            'opacity': self.opacity,
            'scales': self.scales,
            'rotations': self.rotations / (self.rotations.norm(dim=-1, keepdim=True) + 1e-8),
        }

# Initialize model
model = GaussianModel(gaussians).to(device)
print(f"‚úÖ Model initialized with {sum(p.numel() for p in model.parameters()):,} parameters")

## 7Ô∏è‚É£ Render Function using gsplat

In [None]:
from gsplat import rasterization

def render_gaussians(model, w2c, proj, image_size=512):
    """
    Render Gaussian splats from a given camera pose.
    
    Args:
        model: GaussianModel instance
        w2c: World-to-camera matrix (4x4)
        proj: Projection matrix (4x4)
        image_size: Output image resolution
    
    Returns:
        Rendered RGB image (H, W, 3)
    """
    params = model()
    
    # Convert to tensors
    viewmat = torch.tensor(w2c, dtype=torch.float32, device=device)
    K = torch.tensor([
        [proj[0, 0] * image_size / 2, 0, image_size / 2],
        [0, proj[1, 1] * image_size / 2, image_size / 2],
        [0, 0, 1]
    ], dtype=torch.float32, device=device)
    
    # Render using gsplat
    render_colors, render_alphas, meta = rasterization(
        means=params['xyz'],
        quats=params['rotations'],
        scales=params['scales'],
        opacities=params['opacity'],
        colors=params['colors'],
        viewmats=viewmat.unsqueeze(0),
        Ks=K.unsqueeze(0),
        width=image_size,
        height=image_size,
        packed=False,
        render_mode="RGB",
    )
    
    return render_colors[0], render_alphas[0]

# Test render
with torch.no_grad():
    test_image, test_alpha = render_gaussians(model, camera_poses[0], projection, IMAGE_SIZE)

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.imshow(test_image.cpu().numpy().clip(0, 1))
plt.title("RGB Render")
plt.axis('off')

plt.subplot(1, 2, 2)
plt.imshow(test_alpha.cpu().numpy(), cmap='gray')
plt.title("Alpha")
plt.axis('off')
plt.show()

## 8Ô∏è‚É£ Optimization Loop

In [None]:
from tqdm import tqdm
import torch.nn.functional as F

# Optimization settings
NUM_ITERATIONS = 1000
LR_XYZ = 1e-4
LR_COLOR = 1e-3
LR_OPACITY = 0.05
LR_SCALE = 5e-3
LR_ROTATION = 1e-3

# Setup optimizer with per-parameter learning rates
optimizer = torch.optim.Adam([
    {'params': model.xyz, 'lr': LR_XYZ},
    {'params': model.f_dc, 'lr': LR_COLOR},
    {'params': model.f_rest, 'lr': LR_COLOR / 20},
    {'params': model.opacity_raw, 'lr': LR_OPACITY},
    {'params': model.scales_raw, 'lr': LR_SCALE},
    {'params': model.rotations, 'lr': LR_ROTATION},
])

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.995)

# Load target images if available
if use_multiview and len(image_files) > 0:
    target_images = []
    for f in image_files[:N_VIEWS]:
        img = Image.open(os.path.join(MULTIVIEW_DIR, f)).convert('RGB')
        img = img.resize((IMAGE_SIZE, IMAGE_SIZE))
        img_tensor = torch.tensor(np.array(img) / 255.0, dtype=torch.float32, device=device)
        target_images.append(img_tensor)
    print(f"‚úÖ Loaded {len(target_images)} target images for supervision")
else:
    target_images = None
    print("‚ö†Ô∏è No target images - using self-supervision only")

In [None]:
# Training loop
losses = []

print("üöÄ Starting optimization...")
pbar = tqdm(range(NUM_ITERATIONS))

for iteration in pbar:
    optimizer.zero_grad()
    
    # Sample random view
    view_idx = np.random.randint(0, N_VIEWS)
    w2c = camera_poses[view_idx]
    
    # Render
    rendered, alpha = render_gaussians(model, w2c, projection, IMAGE_SIZE)
    
    # Compute loss
    if target_images is not None and view_idx < len(target_images):
        # Photometric loss with target
        target = target_images[view_idx]
        loss = F.mse_loss(rendered, target)
    else:
        # Self-supervision: encourage opacity and smooth colors
        loss = -alpha.mean() * 0.1  # Encourage visibility
        
        # Regularization
        loss += (model.scales_raw.abs().mean() - 1.0).abs() * 0.01  # Scale regularization
    
    # Backprop
    loss.backward()
    optimizer.step()
    scheduler.step()
    
    losses.append(loss.item())
    
    if iteration % 100 == 0:
        pbar.set_postfix({'loss': f'{loss.item():.4f}'})

print(f"\n‚úÖ Optimization complete! Final loss: {losses[-1]:.4f}")

In [None]:
# Plot loss curve
plt.figure(figsize=(10, 4))
plt.plot(losses)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Optimization Progress')
plt.grid(True, alpha=0.3)
plt.show()

## 9Ô∏è‚É£ Render All Views & Create Video

In [None]:
import imageio

# Render all views
print("üé¨ Rendering optimized views...")
rendered_views = []

# Generate more views for smooth video
video_poses = generate_camera_poses(n_views=60, radius=2.0, elevation=30.0)

with torch.no_grad():
    for pose in tqdm(video_poses):
        img, _ = render_gaussians(model, pose, projection, IMAGE_SIZE)
        img_np = (img.cpu().numpy().clip(0, 1) * 255).astype(np.uint8)
        rendered_views.append(img_np)

# Save video
video_path = os.path.join(WORK_DIR, "optimized_render.mp4")
imageio.mimsave(video_path, rendered_views, fps=30)
print(f"‚úÖ Saved video: {video_path}")

# Display sample frames
fig, axes = plt.subplots(2, 4, figsize=(16, 8))
for i, ax in enumerate(axes.flat):
    idx = i * len(rendered_views) // 8
    ax.imshow(rendered_views[idx])
    ax.set_title(f"View {idx}")
    ax.axis('off')
plt.tight_layout()
plt.show()

## üîü Export Optimized PLY

In [None]:
def save_gaussian_ply(model, output_path):
    """Save optimized Gaussians to PLY file."""
    with torch.no_grad():
        params = model()
        
        xyz = params['xyz'].cpu().numpy()
        colors = model.f_dc.cpu().numpy()
        f_rest = model.f_rest.cpu().numpy()
        opacity = model.opacity_raw.cpu().numpy()
        scales = model.scales_raw.cpu().numpy()
        rotations = params['rotations'].cpu().numpy()
        
    num_points = len(xyz)
    
    # Build dtype
    dtype_full = [
        ('x', 'f4'), ('y', 'f4'), ('z', 'f4'),
        ('f_dc_0', 'f4'), ('f_dc_1', 'f4'), ('f_dc_2', 'f4'),
    ]
    for i in range(f_rest.shape[1]):
        dtype_full.append((f'f_rest_{i}', 'f4'))
    dtype_full.extend([
        ('opacity', 'f4'),
        ('scale_0', 'f4'), ('scale_1', 'f4'), ('scale_2', 'f4'),
        ('rot_0', 'f4'), ('rot_1', 'f4'), ('rot_2', 'f4'), ('rot_3', 'f4'),
    ])
    
    # Create array
    elements = np.zeros(num_points, dtype=dtype_full)
    elements['x'] = xyz[:, 0]
    elements['y'] = xyz[:, 1]
    elements['z'] = xyz[:, 2]
    elements['f_dc_0'] = colors[:, 0]
    elements['f_dc_1'] = colors[:, 1]
    elements['f_dc_2'] = colors[:, 2]
    for i in range(f_rest.shape[1]):
        elements[f'f_rest_{i}'] = f_rest[:, i]
    elements['opacity'] = opacity
    elements['scale_0'] = scales[:, 0]
    elements['scale_1'] = scales[:, 1]
    elements['scale_2'] = scales[:, 2]
    elements['rot_0'] = rotations[:, 0]
    elements['rot_1'] = rotations[:, 1]
    elements['rot_2'] = rotations[:, 2]
    elements['rot_3'] = rotations[:, 3]
    
    from plyfile import PlyElement, PlyData
    el = PlyElement.describe(elements, 'vertex')
    PlyData([el]).write(output_path)
    print(f"‚úÖ Saved: {output_path}")

# Save optimized PLY
optimized_ply_path = os.path.join(WORK_DIR, "optimized_gaussian.ply")
save_gaussian_ply(model, optimized_ply_path)

## üì• Download Results

In [None]:
from google.colab import files

print("üì• Downloading results...")

# Download optimized PLY
files.download(optimized_ply_path)

# Download video
files.download(video_path)

print("\n‚úÖ Downloads complete!")

---

## ‚úÖ Next Steps

The optimized Gaussian Splats can now be used for:

1. **Rendering high-quality novel views**
2. **SDXL Enhancement** - Enhance specific views with diffusion
3. **MVCRM Refinement** - Back-project enhancements into the 3D representation

Continue with the **Master Pipeline notebook** for the full Glimpse3D workflow!