## Notebook Code to read PascalVOC2011 images and extract superpixels to store as pickle files




In [None]:
import os

import numpy as np
import scipy.io as sio
import torch
from PIL import Image
from torch.utils import data

import random
import scipy
import pickle
from skimage.segmentation import slic
from skimage.future import graph
from skimage import filters, color

import scipy.ndimage
import scipy.spatial
from scipy.spatial.distance import cdist

import time
import dgl
import torch
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib

### Automatically downloading dataset from the link in this repo
1. SBD from (https://github.com/shelhamer/fcn.berkeleyvision.org/tree/master/data/pascal)
2. After extracting, get benchmark_RELEASE folder.  
3. The benchmark_RELEASE folder will be placed in the current directory

code started from https://github.com/zijundeng/pytorch-semantic-segmentation

##### The SBD currently contains annotations from 11355 images taken from the PASCAL VOC 2011 dataset.

In [None]:
if not os.path.isfile('benchmark.tgz'):
    print('downloading..')
    !wget http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz
    !tar -xzf benchmark.tgz
else:
    print('File already downloaded')

In [None]:
root = '.'
num_classes = 21
ignore_label = 255

"""
color map
0=background, 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle # 6=bus, 7=car, 8=cat, 9=chair, 10=cow, 11=diningtable,
12=dog, 13=horse, 14=motorbike, 15=person # 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
"""

class VOC_SBD_Images(data.Dataset):
    def __init__(self, mode):
        self.imgs = self.read_dataset(mode)
        if len(self.imgs) == 0:
            raise RuntimeError('Found 0 images, please check the data set')
        self.mode = mode
        self.img_list = []
        self.mask_list = []
        self._pack_images_masks()
        
    def read_dataset(self, mode):
        # in this paper, we train on the train set and evaluate on the val set
        assert mode in ['train', 'val']
        items = []
        img_path = os.path.join(root, 'benchmark_RELEASE', 'dataset', 'img')
        mask_path = os.path.join(root, 'benchmark_RELEASE', 'dataset', 'cls')

        if mode == 'train':
            data_list = [l.strip('\n') for l in open(os.path.join(
                root, 'benchmark_RELEASE', 'dataset', 'train.txt')).readlines()]
        elif mode == 'val':
            data_list = [l.strip('\n') for l in open(os.path.join(
                root, 'benchmark_RELEASE', 'dataset', 'val.txt')).readlines()]        

        for it in data_list:
            item = (os.path.join(img_path, it + '.jpg'), os.path.join(mask_path, it + '.mat'))
            items.append(item)
        return items
    
    def _pack_images_masks(self):
        for index in range(self.__len__()):
            img_path, mask_path = self.imgs[index]
            img = Image.open(img_path).convert('RGB')
            
            mask = sio.loadmat(mask_path)['GTcls']['Segmentation'][0][0]
            mask = Image.fromarray(mask.astype(np.uint8))

            self.img_list.append(np.array(img))
            self.mask_list.append(np.array(mask))

    def __getitem__(self, index):
        return self.img_list[index], self.mask_list[index]

    def __len__(self):
        return len(self.imgs)

In [None]:
def process_image_slic(params):
    
    img, index, n_images, args, to_print, shuffle = params
    img_original = img

    assert img.dtype == np.uint8, img.dtype
    img = (img / 255.).astype(np.float32)

    n_sp_extracted = args['n_sp'] + 1  # number of actually extracted superpixels (can be different from requested in SLIC)
    
    # number of superpixels we ask to extract (larger to extract more superpixels - closer to the desired n_sp)
    n_sp_query = args['n_sp'] + 50
    
    while n_sp_extracted > args['n_sp']:
        superpixels = slic(img, n_segments=n_sp_query, compactness=args['compactness'], multichannel=len(img.shape) > 2)
        sp_indices = np.unique(superpixels)
        n_sp_extracted = len(sp_indices)
        n_sp_query -= 1  # reducing the number of superpixels until we get <= n superpixels

    assert n_sp_extracted <= args['n_sp'] and n_sp_extracted > 0, (args['split'], index, n_sp_extracted, args['n_sp'])
    
    # make sure superpixel indices are numbers from 0 to n-1
    assert n_sp_extracted == np.max(superpixels) + 1, ('superpixel indices', np.unique(superpixels))  

    # Creating region adjacency graph based on boundary
    gimg = color.rgb2gray(img_original)
    edges = filters.sobel(gimg)
    
    try:
        g = graph.rag_boundary(superpixels, edges)
    except ValueError: # Error thrown when graph size is perhaps 1
        print("ignored graph")
        g = nx.complete_graph(sp_indices) # so ignoring these for now and placing dummy info
        nx.set_edge_attributes(g, 0., "weight")
        nx.set_edge_attributes(g, 0, "count")
    
    if shuffle:
        ind = np.random.permutation(n_sp_extracted)
    else:
        ind = np.arange(n_sp_extracted)

    sp_order = sp_indices[ind].astype(np.int32)
    if len(img.shape) == 2:
        img = img[:, :, None]

    n_ch = 1 if img.shape[2] == 1 else 3

    sp_intensity, sp_coord = [], []
    for seg in sp_order:
        mask = (superpixels == seg).squeeze()
        avg_value = np.zeros(n_ch)
        std_value = np.zeros(n_ch)
        max_value = np.zeros(n_ch)
        min_value = np.zeros(n_ch)
        for c in range(n_ch):
            avg_value[c] = np.mean(img[:, :, c][mask])
            std_value[c] = np.std(img[:, :, c][mask])
            max_value[c] = np.max(img[:, :, c][mask])
            min_value[c] = np.min(img[:, :, c][mask])
        cntr = np.array(scipy.ndimage.measurements.center_of_mass(mask))  # row, col
        
        sp_intensity.append(np.concatenate((avg_value,
                                           std_value,
                                           max_value,
                                           min_value), -1))
        sp_coord.append(cntr)
    sp_intensity = np.array(sp_intensity, np.float32)
    sp_coord = np.array(sp_coord, np.float32)
    if to_print and (index % 100 == 0):
        print('image={}/{}, shape={}, min={:.2f}, max={:.2f}, n_sp={}'.format(index + 1, n_images, img.shape,
                                                                              img.min(), img.max(), sp_intensity.shape[0]))
    return sp_intensity, sp_coord, sp_order, superpixels, g

In [None]:
def write_superpixels_data(args):
    print("Extracting for {} split..".format(args['split']))
    data_set = train_set if args['split']== 'train' else val_set
    
    random.seed(args['seed'])
    np.random.seed(args['seed'])
    num_samples = len(data_set.img_list)
    sp_data = []
    for i, img in enumerate(data_set.img_list):
        slic_out = process_image_slic((img, i, len(data_set), args, True, False))
        if slic_out is not None:
            sp_data.append(slic_out)
    
    superpixels = [sp_data[i][3] for i in range(num_samples)]
    rag_boundary_graphs = [sp_data[i][4] for i in range(num_samples)]
    sp_data = [sp_data[i][:3] for i in range(num_samples)]

    
    """
    # NODE LABELING
    : using the coord value of the superpixel node to select the 
      corresponding label from the ground truth pixel (segmentation mask)
    """
    sp_node_labels = []
    
    for i, img in enumerate(data_set.mask_list):
        coord = sp_data[i][1]                           # the x and y coord of the superpixel node (float)
        sp_x_coord = np.rint(coord[:,0]).astype(int)    # the rounded x coord of the superpixel node (int)
        sp_y_coord = np.rint(coord[:,1]).astype(int)    # the rounded y coord of the superpixel node (int)

        # labeling the superpixel node with the same value of the original pixel 
        # ground truth  that is on the mean coord of the superpixel node
        sp_node_labels.append(np.array(
            [data_set.mask_list[i][sp_x_coord[_]][sp_y_coord[_]] for _ in range(len(sp_x_coord))], dtype=np.int32))
        
    with open('%s/%s_%dsp_%dcmpt_%s.pkl' % (args['out_dir'], args['dataset'], args['n_sp'], args['compactness'], args['split']), 'wb') as f:
        pickle.dump((sp_node_labels, sp_data), f, protocol=2)
    with open('%s/%s_%dsp_%dcmpt_%s_superpixels.pkl' % (args['out_dir'], args['dataset'], args['n_sp'], args['compactness'], args['split']), 'wb') as f:
        pickle.dump(superpixels, f, protocol=2)
    with open('%s/%s_%dsp_%dcmpt_%s_rag_boundary_graphs.pkl' % (args['out_dir'], args['dataset'], args['n_sp'], args['compactness'], args['split']), 'wb') as f:
        pickle.dump(rag_boundary_graphs, f, protocol=2)

In [None]:
t0 = time.time()
print("[I] Reading and loading SBD_VOC Images and Masks..")
train_set = VOC_SBD_Images('train')
val_set = VOC_SBD_Images('val')
print("[I] Time taken: {:.4f}s".format(time.time()-t0))

args= {
    'n_sp': 500,
    'compactness': 10,
    'seed': 41,
    'out_dir': '.',
    'dataset': 'VOC'
}
count_ignored_graphs = 0
t0 = time.time()
print("[I] Extracting and writing superpixels data..")

# TRAIN SET
args['split'] = 'train'
write_superpixels_data(args)

# VAL SET
args['split'] = 'val'
write_superpixels_data(args)

print("[I] Time taken: {:.4f}s".format(time.time()-t0))

In [None]:
t0 = time.time()
print("[I] Reading and loading SBD_VOC Images and Masks..")
train_set = VOC_SBD_Images('train')
val_set = VOC_SBD_Images('val')
print("[I] Time taken: {:.4f}s".format(time.time()-t0))

args= {
    'n_sp': 500,
    'compactness': 30,
    'seed': 41,
    'out_dir': '.',
    'dataset': 'VOC'
}
count_ignored_graphs = 0
t0 = time.time()
print("[I] Extracting and writing superpixels data..")

# TRAIN SET
args['split'] = 'train'
write_superpixels_data(args)

# VAL SET
args['split'] = 'val'
write_superpixels_data(args)

print("[I] Time taken: {:.4f}s".format(time.time()-t0))

In [None]:
# Plot superpixels with the boundaries overlayed on the image
# plt.imshow(mark_boundaries(img, superpixels))