# Dataset Preparation

- In this notebook we will download the following datasets.
    - Princeton Segmentation Benchmark
    - ShapeNet Parts
    - COSEG
    - Label Meshes
- Create the train/val/test splits.
- Visualize samples from the datasets.

In [2]:
import os
os.chdir('..')

In [3]:
%load_ext autoreload
%autoreload 2
from pathlib import Path
import numpy as np
import trimesh
import os
import os.path

from util.visualization_utils import * 
from transforms import SimCLRTrainDataTransform

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


  stdout_func(


1. ShapeNet Parts

## 2500 Data Points

In [34]:
from datasets.shapenet_parts.shapenet_parts import ShapeNetParts
from util.visualization_utils import visualize_pointcloud
from matplotlib import cm, colors
import numpy as np
from torchvision import transforms, datasets
from augmentations.augmentations import Rescale, Flip, GaussianNoise, Rotation


transformations = [
#     None,
    GaussianNoise,
    Rescale,
    Flip,
    Rotation
]

for transform in transformations:
    print(str(transform))
    if transform:  
        tranformation_compositions = SimCLRTrainDataTransform([transform(p=1)])
    else:
        tranformation_compositions = None
        

    # Create a dataset with train split
    train_dataset = ShapeNetParts('train', transforms=tranformation_compositions)
    val_dataset = ShapeNetParts('val',  transforms=tranformation_compositions)
    test_dataset = ShapeNetParts('test',  transforms=tranformation_compositions)


    # Test lengths
    print(f'Length of train set: {len(train_dataset)}') 

    # Get sample at index 0
    (train_sample,_,_) , (y,_,_), class_id = train_dataset[0]

    print(train_sample.shape)  # Expected output (1, 32, 32, 32) (the leading 1 is important for later)
    print(f"Class = {train_sample[1]}")  # Expected output: Scalar value 0
    print(train_sample.dtype)
    
    shape_points,seg = train_sample.T, y
    point_labels = (seg - min(seg)) / (max(seg) - min(seg))
    point_colors = cm.get_cmap('hsv')(point_labels)[:, :3]
    point_colors = np.sum((point_colors * 255).astype(int) * [255*255, 255, 1], axis=1)
    visualize_pointcloud(shape_points.numpy(), colors=point_colors, point_size=0.025, flip_axes=True)


<class 'augmentations.augmentations.GaussianNoise'>
Length of train set: 12137
torch.Size([3, 2500])
Class = tensor([ 0.0570,  0.0192,  0.2792,  ...,  0.0501, -0.0789,  0.2620])
torch.float32




Output()

<class 'augmentations.augmentations.Rescale'>
Length of train set: 12137
torch.Size([3, 2500])
Class = tensor([-0.1041, -0.0254, -0.0731,  ..., -0.0670, -0.0801,  0.0812])
torch.float32


Output()

<class 'augmentations.augmentations.Flip'>
Length of train set: 12137
torch.Size([3, 2500])
Class = tensor([-0.0314,  0.0932, -0.0097,  ..., -0.0972,  0.0401,  0.0727])
torch.float32


Output()

<class 'augmentations.augmentations.Rotation'>
Length of train set: 12137
torch.Size([3, 2500])
Class = tensor([-0.0861,  0.1106, -0.0101,  ...,  0.1756,  0.1142, -0.1155])
torch.float32


Output()

## 512 Data Points

In [36]:
from datasets.shapenet_parts.shapenet_parts import ShapeNetParts
from util.visualization_utils import visualize_pointcloud
from matplotlib import cm, colors
import numpy as np
from torchvision import transforms, datasets
from augmentations.augmentations import Rescale, Flip, GaussianNoise, Rotation,RandomCuboid, RandomDrop


transformations = [
    None,
  #   GaussianWhiteNoise,
  #   Rescale,
 #     Flip,
 #     Rotation,
 RandomCuboid,
#RandomDrop,
#     CutOut
]

for transform in transformations:
    print(str(transform))
    if transform:  
        tranformation_compositions = SimCLRTrainDataTransform([transform(p=1)])
    else:
        tranformation_compositions = None
    
    # Create a dataset with train split
    train_dataset = ShapeNetParts('train', transforms=tranformation_compositions)
    train_dataset.npoints = 2500

    # Test lengths
    print(f'Length of train set: {len(train_dataset)}') 

    # Get sample at index 0
    if train_dataset.transforms:
        (train_sample,_,_) , (y,_,_), class_id = train_dataset[6000]
    else:
        print('asd')
        train_sample1, y1,_ = train_dataset[6000]
        train_sample2 = None

    print(f'sample shape: {train_sample1[0].shape}')  # Expected output (1, 32, 32, 32) (the leading 1 is important for later)
#     print(f"Class = {train_sample[1]}")  # Expected output: Scalar value 0
    print(f'train_sample1 shape: {train_sample1.shape}')  # Expected output (1, 32, 32, 32) (the leading 1 is important for later)
    print(f'y1 shape: {y1.shape}')  # Expected output (1, 32, 32, 32) (the leading 1 is important for later)

    shape_points,seg = train_sample1.T, y1
    point_labels = (seg - min(seg)) / (max(seg) - min(seg))
    point_colors = cm.get_cmap('hsv')(point_labels)[:, :3]
    point_colors = np.sum((point_colors * 255).astype(int) * [255*255, 255, 1], axis=1)
    visualize_pointcloud(shape_points.numpy(), colors=point_colors, point_size=0.025, flip_axes=True)

    if train_sample2 is not None:
        print(f'train_sample2 shape: {train_sample2.shape}')  # Expected output (1, 32, 32, 32) (the leading 1 is important for later)
        print(f'y2 shape: {y2.shape}') 
        shape_points,seg = train_sample2.T, y2
        point_labels = (seg - min(seg)) / (max(seg) - min(seg))
        point_colors = cm.get_cmap('hsv')(point_labels)[:, :3]
        point_colors = np.sum((point_colors * 255).astype(int) * [255*255, 255, 1], axis=1)
        visualize_pointcloud(shape_points.numpy(), colors=point_colors, point_size=0.025, flip_axes=True)


None
Length of train set: 12137
asd
sample shape: torch.Size([2500])
train_sample1 shape: torch.Size([3, 2500])
y1 shape: torch.Size([2500])


Output()

<class 'augmentations.augmentations.RandomCuboid'>
Length of train set: 12137
sample shape: torch.Size([2500])
train_sample1 shape: torch.Size([3, 2500])
y1 shape: torch.Size([2500])


Output()

In [68]:
np.unique(train_sample1).shape

(1019,)