# Dataset Preparation

- In this notebook we will download the following datasets.
    - Princeton Segmentation Benchmark
    - ShapeNet Parts
    - COSEG
    - Label Meshes
- Create the train/val/test splits.
- Visualize samples from the datasets.

In [21]:
%load_ext autoreload
%autoreload 2
from pathlib import Path
import numpy as np
import trimesh
import os
import os.path

from util.visualization_utils import * 

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


1. ShapeNet Parts

In [94]:
from datasets.shapenet_parts.shapenet_parts import ShapeNetParts
from util.visualization_utils import visualize_pointcloud
from matplotlib import cm, colors
import numpy as np
from torchvision import transforms, datasets
from augmentations.augmentations import Rescale, Flip, GaussianWhiteNoise, CutOut

transforms = transforms.Compose([
    GaussianWhiteNoise()
            ])
# Create a dataset with train split
train_dataset = ShapeNetParts('train', transform=transforms)
val_dataset = ShapeNetParts('val',  transform=transforms)
test_dataset = ShapeNetParts('test',  transform=transforms)


# Test lengths
print(f'Length of train set: {len(train_dataset)}') 

# Get sample at index 0
train_sample = train_dataset[0]
print(train_sample[0].shape)  # Expected output (1, 32, 32, 32) (the leading 1 is important for later)
print(f"Class = {train_sample[1]}")  # Expected output: Scalar value 0

shape_points,seg = train_dataset.get_point_cloud_with_labels(index=0)
point_labels = (seg - min(seg)) / (max(seg) - min(seg))
point_colors = cm.get_cmap('hsv')(point_labels)[:, :3]
point_colors = np.sum((point_colors * 255).astype(int) * [255*255, 255, 1], axis=1)
visualize_pointcloud(shape_points.numpy(), colors=point_colors, point_size=0.025, flip_axes=True)


Length of train set: 1958
point_set: torch.Size([2500, 3])
seg: torch.Size([2500])
torch.Size([2500, 3])
Class = tensor([1, 1, 1,  ..., 4, 1, 1])
point_set: torch.Size([2500, 3])
seg: torch.Size([2500])


Output()