# Dataset Preparation

- In this notebook we will download the following datasets.
    - Princeton Segmentation Benchmark
    - ShapeNet Parts
    - COSEG
    - Label Meshes
- Create the train/val/test splits.
- Visualize samples from the datasets.

In [1]:
%load_ext autoreload
%autoreload 2
from pathlib import Path
import numpy as np
import trimesh
import os
import os.path

from util.visualization_utils import * 

1. ShapeNet Parts

In [2]:
class SimCLRTrainDataTransform(object):
    """
    Transforms for SimCLR
    """

    def __init__(self, data_transforms) -> None:
        self.data_transforms = transforms.Compose(data_transforms)
        print(self.data_transforms)

    def __call__(self, sample):
        transform = self.data_transforms
        
        xi = transform(sample)
        xj = transform(sample)

        return xi, xj


In [3]:
from datasets.shapenet_parts.shapenet_parts import ShapeNetParts
from util.visualization_utils import visualize_pointcloud
from matplotlib import cm, colors
import numpy as np
from torchvision import transforms, datasets
from augmentations.augmentations import Rescale, Flip, GaussianWhiteNoise, CutOut, Rotation


transformations = [
#     None,
    GaussianWhiteNoise,
    Rescale,
    Flip,
    CutOut,
    Rotation
]

for transform in transformations:
    print(str(transform))
    if transform:  
        tranformation_compositions = SimCLRTrainDataTransform([transform(p=1)])
    else:
        tranformation_compositions = None
        

    # Create a dataset with train split
    train_dataset = ShapeNetParts('train', transforms=tranformation_compositions)
    val_dataset = ShapeNetParts('val',  transforms=tranformation_compositions)
    test_dataset = ShapeNetParts('test',  transforms=tranformation_compositions)


    # Test lengths
    print(f'Length of train set: {len(train_dataset)}') 

    # Get sample at index 0
    (train_sample, _), y = train_dataset[0]

    print(train_sample[0].shape)  # Expected output (1, 32, 32, 32) (the leading 1 is important for later)
    print(f"Class = {train_sample[1]}")  # Expected output: Scalar value 0
    print(train_sample[0].dtype)
    shape_points,seg = train_sample.T, y
    point_labels = (seg - min(seg)) / (max(seg) - min(seg))
    point_colors = cm.get_cmap('hsv')(point_labels)[:, :3]
    point_colors = np.sum((point_colors * 255).astype(int) * [255*255, 255, 1], axis=1)
    visualize_pointcloud(shape_points.numpy(), colors=point_colors, point_size=0.025, flip_axes=True)


<class 'augmentations.augmentations.GaussianWhiteNoise'>
Compose(
    <augmentations.augmentations.GaussianWhiteNoise object at 0x7fbf1544bbb0>
)
Length of train set: 1958
torch.Size([2500])
Class = tensor([-0.0621,  0.0856, -0.0410,  ..., -0.1387,  0.0478, -0.0971])
torch.float32




Output()

<class 'augmentations.augmentations.Rescale'>
Compose(
    <augmentations.augmentations.Rescale object at 0x7fbf14831eb0>
)
Length of train set: 1958
torch.Size([2500])
Class = tensor([ 0.0219, -0.0502,  0.0446,  ...,  0.0501,  0.0039,  0.1792])
torch.float32


Output()

<class 'augmentations.augmentations.Flip'>
Compose(
    <augmentations.augmentations.Flip object at 0x7fbf1544bb80>
)
Length of train set: 1958
torch.Size([2500])
Class = tensor([-0.1033, -0.0138, -0.1133,  ...,  0.0074, -0.1150, -0.0980])
torch.float32


Output()

<class 'augmentations.augmentations.CutOut'>
Compose(
    <augmentations.augmentations.CutOut object at 0x7fbf14659f70>
)
Length of train set: 1958
torch.Size([2228])
Class = tensor([-0.0208,  0.2425,  0.0372,  ..., -0.0914,  0.2446,  0.0025])
torch.float32


Output()

<class 'augmentations.augmentations.Rotation'>
Compose(
    <augmentations.augmentations.Rotation object at 0x7fbf1475bf70>
)
Length of train set: 1958
torch.Size([2500])
Class = tensor([ 0.2779,  0.0513,  0.2622,  ..., -0.0838, -0.0562, -0.0598])
torch.float32


Output()

In [5]:
from datasets.shapenet_parts.shapenet_parts import ShapeNetParts
from util.visualization_utils import visualize_pointcloud
from matplotlib import cm, colors
import numpy as np
from torchvision import transforms, datasets
from augmentations.augmentations import Rescale, Flip, GaussianWhiteNoise, CutOut, Rotation


transformations = [
#     None,
    GaussianWhiteNoise,
    Rescale,
    Flip,
    CutOut,
    Rotation
]

for transform in transformations:
    print(str(transform))
    if transform:  
        tranformation_compositions = SimCLRTrainDataTransform([transform(p=1)])
    else:
        tranformation_compositions = None
        

    # Create a dataset with train split
    train_dataset = ShapeNetParts('train', transforms=tranformation_compositions)
    train_dataset.npoints = 8**3

    # Test lengths
    print(f'Length of train set: {len(train_dataset)}') 

    # Get sample at index 0
    (train_sample, _), y = train_dataset[0]

    print(train_sample[0].shape)  # Expected output (1, 32, 32, 32) (the leading 1 is important for later)
#     print(f"Class = {train_sample[1]}")  # Expected output: Scalar value 0

    shape_points,seg = train_sample.T, y
    point_labels = (seg - min(seg)) / (max(seg) - min(seg))
    point_colors = cm.get_cmap('hsv')(point_labels)[:, :3]
    point_colors = np.sum((point_colors * 255).astype(int) * [255*255, 255, 1], axis=1)
    visualize_pointcloud(shape_points.numpy(), colors=point_colors, point_size=0.025, flip_axes=True)


<class 'augmentations.augmentations.GaussianWhiteNoise'>
Compose(
    <augmentations.augmentations.GaussianWhiteNoise object at 0x7fbf148533a0>
)
Length of train set: 1958
torch.Size([512])


Output()

<class 'augmentations.augmentations.Rescale'>
Compose(
    <augmentations.augmentations.Rescale object at 0x7fbf14624130>
)
Length of train set: 1958
torch.Size([512])


Output()

<class 'augmentations.augmentations.Flip'>
Compose(
    <augmentations.augmentations.Flip object at 0x7fbf14667e80>
)
Length of train set: 1958
torch.Size([512])


Output()

<class 'augmentations.augmentations.CutOut'>
Compose(
    <augmentations.augmentations.CutOut object at 0x7fbf1473fb50>
)
Length of train set: 1958
torch.Size([352])


Output()

<class 'augmentations.augmentations.Rotation'>
Compose(
    <augmentations.augmentations.Rotation object at 0x7fbf1441fd90>
)
Length of train set: 1958
torch.Size([512])


Output()