In [6]:
import requests
import json
import torch
import glob
import os
import numpy as np
import shutil
import argparse
import logging
import pandas as pd
from PIL import Image, UnidentifiedImageError
from torchvision import transforms
from transformers import AutoImageProcessor, Mask2FormerForUniversalSegmentation
import warnings
import time
from tqdm import tqdm
from collections import Counter
from torch.cuda import OutOfMemoryError

In [2]:
# transform = transforms.Compose([
#     transforms.Resize((120, 120))])

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
NUM_CLASSES = 65
CLS_DICT = {'0': 'Bird',
 '1': 'Ground-Animal',
 '2': 'Curb',
 '3': 'Fence',
 '4': 'Guard-Rail',
 '5': 'Barrier',
 '6': 'Wall',
 '7': 'Bike-Lane',
 '8': 'Crosswalk---Plain',
 '9': 'Curb-Cut',
 '10': 'Parking',
 '11': 'Pedestrian-Area',
 '12': 'Rail-Track',
 '13': 'Road',
 '14': 'Service-Lane',
 '15': 'Sidewalk',
 '16': 'Bridge',
 '17': 'Building',
 '18': 'Tunnel',
 '19': 'Person',
 '20': 'Bicyclist',
 '21': 'Motorcyclist',
 '22': 'Other-Rider',
 '23': 'Lane-Marking---Crosswalk',
 '24': 'Lane-Marking---General',
 '25': 'Mountain',
 '26': 'Sand',
 '27': 'Sky',
 '28': 'Snow',
 '29': 'Terrain',
 '30': 'Vegetation',
 '31': 'Water',
 '32': 'Banner',
 '33': 'Bench',
 '34': 'Bike-Rack',
 '35': 'Billboard',
 '36': 'Catch-Basin',
 '37': 'CCTV-Camera',
 '38': 'Fire-Hydrant',
 '39': 'Junction-Box',
 '40': 'Mailbox',
 '41': 'Manhole',
 '42': 'Phone-Booth',
 '43': 'Pothole',
 '44': 'Street-Light',
 '45': 'Pole',
 '46': 'Traffic-Sign-Frame',
 '47': 'Utility-Pole',
 '48': 'Traffic-Light',
 '49': 'Traffic-Sign-(Back)',
 '50': 'Traffic-Sign-(Front)',
 '51': 'Trash-Can',
 '52': 'Bicycle',
 '53': 'Boat',
 '54': 'Bus',
 '55': 'Car',
 '56': 'Caravan',
 '57': 'Motorcycle',
 '58': 'On-Rails',
 '59': 'Other-Vehicle',
 '60': 'Trailer',
 '61': 'Truck',
 '62': 'Wheeled-Slow',
 '63': 'Car-Mount',
 '64': 'Ego-Vehicle'}

# Get helper function
def addIndice(output_max):
    set_of_pixels = torch.unique(output_max, return_counts=True)
    set_dictionary = {}
    for i in range(NUM_CLASSES):
            set_dictionary[str(i)] = 0
    for pixel,count in zip(set_of_pixels[0], set_of_pixels[1]):
        set_dictionary[str(pixel.item())] = count.item()
    set_dictionary['Total'] = int(np.sum(list(set_dictionary.values())))
    return set_dictionary

def addInstance(output_max):
    list_unique, list_counts = torch.unique(out[0]['segmentation'].int(), return_counts=True)

    if -1 in list_unique:
        list_unique = list_unique[1:]
        list_counts = list_counts[1:]

    total = torch.sum(list_counts).item()

    matching_dict = {}
    for i, k in zip(range(len(out[0]['segments_info'])), out[0]['segments_info']):
        matching_dict[i] = int(k['label_id'])

    set_dictionary = {}
    for i in range(NUM_CLASSES):
                set_dictionary[str(i)] = 0

    for i, k in zip(list_unique, list_counts):
        set_dictionary[str(matching_dict[i.item()])] += k.item()
        
    set_dictionary['Total'] = total

    return set_dictionary

def addInstanceCounts(output_max):

    instance_dictionary = {}
    
    instance_dictionary = {}
    for i in range(NUM_CLASSES):
                instance_dictionary[str(i)] = 0
    
    # for each segment, draw its legend
    for segment in out[0]['segments_info']:
        segment_id = segment['id']
        segment_label_id = str(segment['label_id'])
        instance_dictionary[segment_label_id] += 1

    return instance_dictionary

# Load Mask2Former
processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-large-mapillary-vistas-panoptic")
model = Mask2FormerForUniversalSegmentation.from_pretrained("facebook/mask2former-swin-large-mapillary-vistas-panoptic")
model = model.to(device)

In [4]:
print(device)

cuda


In [5]:
# Set start state
image_indicators_dict = {}
image_instances_dict = {}

# Create output folder if none exist
if not os.path.exists(f'./outputs'):
    os.makedirs(f'./outputs')

# Get list of images
image_set = [i for i in os.listdir(os.path.join(os.getcwd(),'/media/ual/UAL-PSSD-1/global_streetscapes_imgs_1/'))]

In [6]:
len(image_set)

1677722

In [7]:
for i, image in enumerate(tqdm(image_set[0:])):
    print(f'Segmenting {image}')
    img = Image.open(os.path.join(os.getcwd(),'/media/ual/UAL-PSSD-1/global_streetscapes_imgs_1/', f'{image}'))
    inputs = processor(images=img, return_tensors="pt", height = 120, width=120)
    with torch.no_grad():
        pixel_values = inputs['pixel_values'].to(device)
        pixel_mask = inputs['pixel_mask'].to(device)
        outputs = model(pixel_values = pixel_values, pixel_mask = pixel_mask)
        outputs['class_queries_logits'] = outputs['class_queries_logits'].to('cpu')
        outputs['masks_queries_logits'] = outputs['masks_queries_logits'].to('cpu')
        outputs['encoder_last_hidden_state'] = outputs['encoder_last_hidden_state'].to('cpu')
        outputs['pixel_decoder_last_hidden_state'] = outputs['pixel_decoder_last_hidden_state'].to('cpu')
        outputs['transformer_decoder_last_hidden_state'] = outputs['transformer_decoder_last_hidden_state'].to('cpu')
    out = processor.post_process_instance_segmentation(outputs, target_sizes=[img.size[::-1]], threshold=0.25)
    image_indicators_dict[image] = addInstance(out)
    image_instances_dict[image] = addInstanceCounts(out)

  0%|                                                                                                                                                                          | 0/1677722 [00:00<?, ?it/s]

Segmenting 278f7c8a-3063-4acd-ae93-4c6ff848c8d3.jpeg


  return F.conv2d(input, weight, bias, self.stride,
  0%|                                                                                                                                                             | 1/1677722 [00:04<1893:00:39,  4.06s/it]

Segmenting b6a4d5a1-dde2-40c9-9c55-bd6fb3431bef.jpeg


  0%|                                                                                                                                                             | 2/1677722 [00:07<1620:57:37,  3.48s/it]

Segmenting c502f3d8-0c08-407f-b2e1-14f3d5de49b6.jpeg


  0%|                                                                                                                                                             | 3/1677722 [00:10<1495:58:00,  3.21s/it]

Segmenting 77536944-fd4e-4f0d-a724-a2d54f47b9a9.jpeg


  0%|                                                                                                                                                             | 4/1677722 [00:13<1526:58:00,  3.28s/it]

Segmenting fb8768bc-d9f6-4618-8dcb-7442a5337a5d.jpeg


  0%|                                                                                                                                                             | 5/1677722 [00:16<1473:39:06,  3.16s/it]

Segmenting 5cff3614-fbd7-4f9e-a36b-2e6acc35a6b2.jpeg


  0%|                                                                                                                                                             | 6/1677722 [00:19<1422:06:17,  3.05s/it]

Segmenting 507ff643-59a9-4f2c-97c8-5a8e4b3cc77c.jpeg


  0%|                                                                                                                                                             | 7/1677722 [00:22<1472:00:50,  3.16s/it]

Segmenting 78bb3185-5157-43a8-8e42-971c2da46186.jpeg


  0%|                                                                                                                                                             | 8/1677722 [00:25<1503:46:39,  3.23s/it]

Segmenting 27413128-043e-4f89-b135-0c2249677d1b.jpeg


  0%|                                                                                                                                                             | 9/1677722 [00:28<1455:15:40,  3.12s/it]

Segmenting d8494c7a-eb8f-4fea-854e-921512ecd609.jpeg


  0%|                                                                                                                                                            | 10/1677722 [00:31<1418:07:34,  3.04s/it]

Segmenting 1f08f941-1baf-460f-bc0d-672a29325db5.jpeg


  0%|                                                                                                                                                            | 11/1677722 [00:34<1415:11:29,  3.04s/it]

Segmenting 10b32d5f-84d7-486e-9c6b-7fe9b64385b0.jpeg


  0%|                                                                                                                                                            | 12/1677722 [00:37<1388:56:23,  2.98s/it]

Segmenting 6f032d01-f078-4c4c-b2dc-92b8ed5da2f1.jpeg


  0%|                                                                                                                                                            | 12/1677722 [00:38<1477:26:33,  3.17s/it]


OutOfMemoryError: CUDA out of memory. Tried to allocate 6.18 GiB (GPU 0; 7.92 GiB total capacity; 1.17 GiB already allocated; 5.75 GiB free; 1.24 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
img.size

In [None]:
print(f'Segmenting {image}')
img = Image.open(os.path.join(os.getcwd(),'/media/ual/UAL-PSSD-1/global_streetscapes_imgs_1/', '6f032d01-f078-4c4c-b2dc-92b8ed5da2f1.jpeg'))
inputs = processor(images=img, return_tensors="pt")
with torch.no_grad():
    pixel_values = inputs['pixel_values'].to(device)
    pixel_mask = inputs['pixel_mask'].to(device)
    outputs = model(pixel_values = pixel_values, pixel_mask = pixel_mask)
    outputs['class_queries_logits'] = outputs['class_queries_logits'].to('cpu')
    outputs['masks_queries_logits'] = outputs['masks_queries_logits'].to('cpu')
    outputs['encoder_last_hidden_state'] = outputs['encoder_last_hidden_state'].to('cpu')
    outputs['pixel_decoder_last_hidden_state'] = outputs['pixel_decoder_last_hidden_state'].to('cpu')
    outputs['transformer_decoder_last_hidden_state'] = outputs['transformer_decoder_last_hidden_state'].to('cpu')
out = processor.post_process_instance_segmentation(outputs, target_sizes=[img.size[::-1]], threshold=0.25)
image_indicators_dict[image] = addInstance(out)
image_instances_dict[image] = addInstanceCounts(out)

In [None]:
outputs[0] = outputs[0].to('cpu')

In [None]:
transform(img)

In [None]:
img

In [None]:
df = pd.DataFrame.from_dict(image_instances_dict, orient='index')

In [None]:
image_instances_dict['139784358078458.jpg']

In [None]:
img = Image.open(os.path.join(os.getcwd(),'test/', '139784358078458.jpg'))

In [None]:
img

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib import cm

In [None]:
plt.imshow(out[0]['segmentation'])

### Merge CSV

In [None]:
from glob import glob
import pandas as pd

In [None]:
paths[0]

In [None]:
out2 = pd.read_csv(paths[1])

In [None]:
total = pd.DataFrame()

In [None]:
total = pd.concat([total, out2], axis=0)

In [None]:
def merge_csv(filepath):

    extension = filepath + '*.csv'
    paths = glob(extension)
    
    combined = pd.DataFrame()
    for path in paths:
        temp = pd.read_csv(path)
        combined = pd.concat([combined, temp], axis=0)
    
    return combined

In [None]:
all_df = merge_csv('./outputs/')

In [None]:
len(all_df)

In [None]:
all_df.to_csv('./final/ssd1_1676060.csv')

In [None]:
# Closing log file

with open('yourlog.log', 'w'):
    pass
