# CubeDiff: Data Preparation

This notebook demonstrates the process of preparing panorama data for CubeDiff training.

1. Download sample panoramas
2. Convert equirectangular panoramas to cubemaps
3. Visualize the data
4. Create datasets and dataloaders

In [None]:
import os
import sys
import numpy as np
import torch
from PIL import Image
from matplotlib import pyplot as plt
import requests
from tqdm.auto import tqdm
import json

# Add parent directory to path
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Import custom modules
from data.preprocessing import equirect_to_cubemap, preprocess_panorama_dataset
from data.dataset import CubemapDataset, get_dataloader

## 1. Download Sample Panoramas

Let's download a few sample panoramas from Polyhaven.

In [None]:
# Create data directories
os.makedirs("../data/raw", exist_ok=True)
os.makedirs("../data/processed", exist_ok=True)

# Sample panorama URLs from Polyhaven
sample_urls = [
    "https://dl.polyhaven.org/file/ph-assets/HDRIs/exr/2k/alps_field_2k.exr",
    "https://dl.polyhaven.org/file/ph-assets/HDRIs/exr/2k/empty_warehouse_01_2k.exr",
    "https://dl.polyhaven.org/file/ph-assets/HDRIs/exr/2k/sunflowers_2k.exr",
    "https://dl.polyhaven.org/file/ph-assets/HDRIs/exr/2k/venetian_crossroads_2k.exr",
    "https://dl.polyhaven.org/file/ph-assets/HDRIs/exr/2k/rural_asphalt_road_2k.exr"
]

# Download panoramas
for url in tqdm(sample_urls, desc="Downloading panoramas"):
    filename = os.path.basename(url)
    filepath = os.path.join("../data/raw", filename)
    
    if not os.path.exists(filepath):
        response = requests.get(url, stream=True)
        with open(filepath, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        
        # Convert EXR to JPG for easier handling
        jpg_filepath = filepath.replace('.exr', '.jpg')
        
        # Use OpenCV to read and convert EXR
        import cv2
        import numpy as np
        exr_img = cv2.imread(filepath, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH)
        
        # Simple tone mapping for HDR to LDR conversion
        exr_img = np.clip(exr_img * 0.5, 0, 1) * 255
        exr_img = exr_img.astype(np.uint8)
        
        # Convert BGR to RGB
        exr_img = cv2.cvtColor(exr_img, cv2.COLOR_BGR2RGB)
        
        # Save as JPG
        Image.fromarray(exr_img).save(jpg_filepath)
        print(f"Converted {filename} to {os.path.basename(jpg_filepath)}")

In [None]:
# Process the downloaded panoramas
preprocess_panorama_dataset(
    input_dir="../data/raw",
    output_dir="../data/processed/cubemaps",
    face_size=512,
)

In [None]:
# Create sample captions for the panoramas
captions = {
    "alps_field": "A panoramic view of the Swiss Alps with green fields under a clear blue sky",
    "empty_warehouse_01": "An empty industrial warehouse with concrete floors and metal beams",
    "sunflowers": "A field of golden sunflowers stretching to the horizon under a sunny sky",
    "venetian_crossroads": "A scenic Italian street intersection in Venice with historic buildings",
    "rural_asphalt_road": "A rural asphalt road cutting through countryside fields"
}

# Save captions to JSON file
with open("../data/processed/captions.json", "w") as f:
    json.dump(captions, f, indent=4)

## 2. Visualize the Cubemap Faces

In [None]:
# Visualize the cubemap faces for one panorama
sample_pano = "sunflowers"
sample_pano_dir = os.path.join("../data/processed/cubemaps", sample_pano)

face_names = ['front', 'right', 'back', 'left', 'top', 'bottom']
face_images = []

for face_name in face_names:
    face_path = os.path.join(sample_pano_dir, f"{face_name}.jpg")
    face_img = Image.open(face_path)
    face_images.append(np.array(face_img))

# Plot the cubemap faces
fig, axs = plt.subplots(2, 3, figsize=(15, 10))
for i, (face_name, face_img) in enumerate(zip(face_names, face_images)):
    row, col = i // 3, i % 3
    axs[row, col].imshow(face_img)
    axs[row, col].set_title(face_name)
    axs[row, col].axis('off')
plt.tight_layout()
plt.show()

## 3. Test the Dataset

In [None]:
# Test the dataset
test_dataset = CubemapDataset(
    data_dir="../data/processed/cubemaps",
    captions_file="../data/processed/captions.json"
)

print(f"Dataset size: {len(test_dataset)}")

# Get a sample
sample = test_dataset[0]
print(f"Sample keys: {sample.keys()}")
print(f"Faces tensor shape: {sample['faces'].shape}")
print(f"Caption: {sample['caption']}")

In [None]:
# Visualize the sample
faces = sample['faces']
faces_np = faces.permute(0, 2, 3, 1).numpy() * 0.5 + 0.5
faces_np = np.clip(faces_np, 0, 1)

fig, axs = plt.subplots(2, 3, figsize=(15, 10))
for i, (face_name, face_img) in enumerate(zip(face_names, faces_np)):
    row, col = i // 3, i % 3
    axs[row, col].imshow(face_img)
    axs[row, col].set_title(face_name)
    axs[row, col].axis('off')
plt.suptitle(f"Caption: {sample['caption']}")
plt.tight_layout()
plt.show()

## 4. Test the Dataloader

In [None]:
# Test the dataloader
test_dataloader = get_dataloader(
    data_dir="../data/processed/cubemaps",
    captions_file="../data/processed/captions.json",
    batch_size=2
)

batch = next(iter(test_dataloader))
print(f"Batch keys: {batch.keys()}")
print(f"Batch faces shape: {batch['faces'].shape}")
print(f"Batch captions: {batch['caption']}")

## 5. Create Training and Validation Splits

In [None]:
# For a real training scenario, create train/val splits
from torch.utils.data import random_split

# With a small dataset, we'll use a higher validation ratio for demonstration
val_ratio = 0.2
dataset_size = len(test_dataset)
val_size = int(dataset_size * val_ratio)
train_size = dataset_size - val_size

# Create random splits with a fixed seed for reproducibility
train_dataset, val_dataset = random_split(
    test_dataset,
    [train_size, val_size],
    generator=torch.Generator().manual_seed(42)
)

print(f"Full dataset size: {dataset_size}")
print(f"Training set size: {len(train_dataset)}")
print(f"Validation set size: {len(val_dataset)}")

## 6. Summary and Next Steps

We've successfully:
1. Downloaded sample panoramas from Polyhaven
2. Converted equirectangular panoramas to cubemap faces
3. Created a dataset with captions
4. Visualized the cubemap faces
5. Tested the dataset and dataloader
6. Created training and validation splits

Next steps:
1. Implement the CubeDiff model architecture
2. Set up the training pipeline
3. Train the model on this dataset
4. Evaluate the results

For a real training scenario, you would want to collect more panoramas to expand the dataset. The paper mentions using Polyhaven, SUN360, and other sources to build a larger dataset. Even with a small dataset, the LoRA fine-tuning approach should be able to produce decent results.