### Spherical Feature Extraction using s2cnn

In [1]:
import sys
sys.path.append('C:/Users/ustundag/GitHub/2D-3D-Semantics/s2cnn_TORCH/')

import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data_utils
import torchvision.transforms.functional as tfun
from torch.autograd import Variable

from s2cnn import SO3Convolution
from s2cnn import S2Convolution
from s2cnn import so3_integrate
from s2cnn import so3_near_identity_grid
from s2cnn import s2_near_identity_grid

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
class S2ConvNet_deep(nn.Module):
    def __init__(self, bandwidth = 30):
        super(S2ConvNet_deep, self).__init__()
        grid_s2    =  s2_near_identity_grid(n_alpha=6, max_beta=np.pi/16, n_beta=1)
        grid_so3_1 = so3_near_identity_grid(n_alpha=6, max_beta=np.pi/16, n_beta=1, max_gamma=2*np.pi, n_gamma=6)
        grid_so3_2 = so3_near_identity_grid(n_alpha=6, max_beta=np.pi/8,  n_beta=1, max_gamma=2*np.pi, n_gamma=6)
        grid_so3_3 = so3_near_identity_grid(n_alpha=6, max_beta=np.pi/4,  n_beta=1, max_gamma=2*np.pi, n_gamma=6)
        grid_so3_4 = so3_near_identity_grid(n_alpha=6, max_beta=np.pi/2,  n_beta=1, max_gamma=2*np.pi, n_gamma=6)
        grid_so3_5 = so3_near_identity_grid(n_alpha=6, max_beta=0.2,      n_beta=1)

        self.convolutional = nn.Sequential(
            S2Convolution(
                nfeature_in  = 3,
                nfeature_out = 8,
                b_in  = bandwidth,
                b_out = bandwidth,
                grid=grid_s2),
            nn.ReLU(inplace=False),
            SO3Convolution(
                nfeature_in  =  8,
                nfeature_out = 16,
                b_in  = bandwidth,
                b_out = bandwidth//2,
                grid=grid_so3_1),
            nn.ReLU(inplace=False),
            SO3Convolution(
                nfeature_in  = 16,
                nfeature_out = 16,
                b_in  = bandwidth//2,
                b_out = bandwidth//2,
                grid=grid_so3_2),
            nn.ReLU(inplace=False),
            SO3Convolution(
                nfeature_in  = 16,
                nfeature_out = 24,
                b_in  = bandwidth//2,
                b_out = bandwidth//4,
                grid=grid_so3_2),
            nn.ReLU(inplace=False),
            SO3Convolution(
                nfeature_in  = 24,
                nfeature_out = 24,
                b_in  = bandwidth//4,
                b_out = bandwidth//4,
                grid=grid_so3_3),
            nn.ReLU(inplace=False),
            SO3Convolution(
                nfeature_in  = 24,
                nfeature_out = 32,
                b_in  = bandwidth//4,
                b_out = bandwidth//8,
                grid=grid_so3_3),
            nn.ReLU(inplace=False),
            SO3Convolution(
                nfeature_in  = 32,
                nfeature_out = 64,
                b_in  = bandwidth//8,
                b_out = bandwidth//8,
                grid=grid_so3_4),
            nn.ReLU(inplace=False)
            )

    def forward(self, x):
        x = self.convolutional(x)
        #x = so3_integrate(x)
        #x = self.linear(x)
        return x

In [3]:
s2cnn = S2ConvNet_deep(bandwidth=64)
s2cnn.to(DEVICE)

S2ConvNet_deep(
  (convolutional): Sequential(
    (0): S2Convolution()
    (1): ReLU()
    (2): SO3Convolution()
    (3): ReLU()
    (4): SO3Convolution()
    (5): ReLU()
    (6): SO3Convolution()
    (7): ReLU()
    (8): SO3Convolution()
    (9): ReLU()
    (10): SO3Convolution()
    (11): ReLU()
    (12): SO3Convolution()
    (13): ReLU()
  )
)

In [4]:
"""
path = 'C:/Users/ustundag/Desktop/test_pano_rgb.png'
img = Image.open(path)
img = img.resize((128,128))
data = np.asarray(img, dtype=np.float32)
data = tfun.to_tensor(data)
data = data.unsqueeze_(0)
data = data[:,:3,:,:]
print(data.shape)
plt.imshow(img)
plt.show()
"""

"\npath = 'C:/Users/ustundag/Desktop/test_pano_rgb.png'\nimg = Image.open(path)\nimg = img.resize((128,128))\ndata = np.asarray(img, dtype=np.float32)\ndata = tfun.to_tensor(data)\ndata = data.unsqueeze_(0)\ndata = data[:,:3,:,:]\nprint(data.shape)\nplt.imshow(img)\nplt.show()\n"

In [5]:
"""
images  = data.to(DEVICE)
outputs = s2cnn(images)
print('outputs.shape: ', outputs.shape)
"""

"\nimages  = data.to(DEVICE)\noutputs = s2cnn(images)\nprint('outputs.shape: ', outputs.shape)\n"

In [6]:
"""
x = outputs.detach().cpu().numpy()
a = x[0, 0, :, :, 10]
print(a.shape)
plt.imshow(a, cmap='gray')
plt.show()
"""

"\nx = outputs.detach().cpu().numpy()\na = x[0, 0, :, :, 10]\nprint(a.shape)\nplt.imshow(a, cmap='gray')\nplt.show()\n"

### Extract and save features of 7 specific objects using semantics as masks

In [7]:
import assets.utils as u
VALID_OBJECTS = ('board','bookcase','chair','door','sofa','table','window')

import glob
from scipy.io import savemat, loadmat
from IPython.display import display, clear_output
import torchvision.transforms.functional as tfun

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def get_label(pix):
    labels = u.load_labels('C:/Users/ustundag/Github/2D-3D-Semantics/assets/semantic_labels.json')
    limit = len(labels)
    i = u.get_index(pix)
    if i < limit:
        instance_label = labels[i]
        instance_label_as_dict = u.parse_label(instance_label)
        label = instance_label_as_dict["instance_class"]
        return label
    return '<UNK>' # unknown in case index is out of bounds in "labels.json" file

def image2tensor(path, dim):
    img = Image.open(path)
    img = img.resize((dim,dim))
    img = np.asarray(img, dtype=np.float32)
    tensor = tfun.to_tensor(img)
    tensor = tensor[:3,:,:]
    tensor = tensor.unsqueeze_(0)
    return tensor

def save_features_and_labels(file):
    paths = glob.glob("C:\\Users\\ustundag\\GitHub\\2D-3D-Semantics\\area_3\\pano\\rgb\\*.png")
    features = []
    labels = []
    s2cnn = S2ConvNet_deep(bandwidth=64)
    s2cnn.to(DEVICE)
    
    i = 1
    for path in paths:
        clear_output(wait=True)
        tensor = image2tensor(path, dim=128) # 'dim' must be double of bandwidth
        images = tensor.to(DEVICE)
        fmap = s2cnn(images) # torch.Size([1, 64, 16, 16, 16])
        fmap = fmap.detach().cpu().numpy()
        fmap = fmap[0, :, :, :, 0] # torch.Size([64, 16, 16])
        fmap = fmap.reshape(fmap.shape[0], fmap.shape[1]*fmap.shape[2])

        # Replace 2 occurrences to find counterpart of RGB image as Semantic
        sem_file   = path.replace("rgb", "semantic", 2)
        sem_img    = np.asarray(Image.open(sem_file).resize((16,16)))
        print("sem_img.shape: ", sem_img.shape)
        sem_pixels = sem_img.reshape(sem_img.shape[0]*sem_img.shape[1], sem_img.shape[2])
        #unique_pixels = np.unique(sem_pixels, axis=0)
        valid_indexes = [[np.argwhere((sem_pixels == p).all(axis=1))[0,0], get_label(p)]
                            for p in sem_pixels
                            if get_label(p) in VALID_OBJECTS]
        # first value = feature index, second value = label
        for idx in valid_indexes:
            features.append(fmap[:, idx[0]])
            labels.append(VALID_OBJECTS.index(idx[1]))

        display(str(i) + " / 85")
        i += 1
    
    savemat(file,{'features': np.asarray(features),
                  'labels'  : np.asarray(labels)})

In [8]:
file = 'area_3_data_pano_s2cnn_dims_128_128_16_16.mat'
save_features_and_labels(file)



sem_img.shape:  (16, 16, 3)


'85 / 85'

In [9]:
data = loadmat("C:\\Users\\ustundag\\GitHub\\2D-3D-Semantics\\"+file)
features = data["features"]
labels   = data["labels"]

In [10]:
features.shape

(3141, 64)

In [11]:
labels.shape

(1, 3141)