In [55]:
from IPython.core.display import clear_output
import torch

model = torch.hub.load('pytorch/vision:v0.10.0', 'fcn_resnet101', pretrained=True)
model.eval()
clear_output()

In [2]:
import urllib
url, filename = ("https://raw.githubusercontent.com/onnx/models/master/vision/object_detection_segmentation/fcn/dependencies/voc_classes.txt", "voc_classes.txt")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)

In [17]:
import matplotlib.pyplot as plt
import numpy as np
import os
import cv2 as cv

In [137]:
classes_file = open("voc_classes.txt", 'r')
clasess = [c.strip() for c in classes_file.readlines()]

In [56]:
# sample execution (requires torchvision)
from PIL import Image
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [57]:
def preprocess(input_image):
  input_image = input_image.convert("RGB")

  input_tensor = transform(input_image)
  input_batch = input_tensor.unsqueeze(0)
  return input_batch

def infer(input_batch):
  with torch.no_grad():
    output = model(input_batch)['out'][0]
  return output.argmax(0)

In [58]:
def get_mask_by_tag(output, target_class):
  target_class = clasess.index(target_class)
  tmp = output.numpy()

  binary_mask = tmp == target_class
  binary_mask = binary_mask * 255
  return binary_mask

In [None]:
!unzip test-dataset-20220228T123506Z-001.zip

In [59]:
import json

f = open('test-dataset/tags.json')
tags_dict = json.load(f)

In [None]:
test_pictures = [p for p in os.listdir('test-dataset') if p.split('.')[-1] in ['png', 'jpg', 'jpeg']]
for i, image in enumerate(test_pictures):
  input_image = Image.open(os.path.join("test-dataset", image))
  input_batch = preprocess(input_image)
  output_image = infer(input_batch)
  tags = tags_dict[image]
  bin_mask = np.zeros(output_image.shape, dtype=np.int32)
  for tag in tags:
    bin_mask += get_mask_by_tag(output_image, tag);

  im = Image.fromarray(bin_mask).convert('RGB')
  im.save("fcn-segmented/{}-segmentation.png".format(image.split('.')[0]))

  plt.imshow(input_image)
  plt.imshow(bin_mask, alpha=0.7)
  plt.axis('off')
  plt.savefig("fcn-segmented/{}-overlay.png".format(image.split('.')[0]))
  

In [None]:
!zip -r fcn-segmented.zip fcn-segmented/

In [256]:
import time

input_image = Image.open('/content/test-dataset/adventures-autumn-beautiful-blonde.jpg')
input_batch = preprocess(input_image)

start = time.time()
for i in range(100):
  output_image = infer(input_batch)

end = time.time()

print(end - start)

1494.1132249832153


In [264]:
input_image.size

(910, 607)

In [257]:
(end - start ) / 100

14.941132249832153

In [263]:
print("FPS:", 1 / ((end - start ) / 100))

FPS: 0.06692933194612703


In [258]:
!cat /proc/cpuinfo

processor	: 0
vendor_id	: GenuineIntel
cpu family	: 6
model		: 79
model name	: Intel(R) Xeon(R) CPU @ 2.20GHz
stepping	: 0
microcode	: 0x1
cpu MHz		: 2200.216
cache size	: 56320 KB
physical id	: 0
siblings	: 2
core id		: 0
cpu cores	: 1
apicid		: 0
initial apicid	: 0
fpu		: yes
fpu_exception	: yes
cpuid level	: 13
wp		: yes
flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm rdseed adx smap xsaveopt arat md_clear arch_capabilities
bugs		: cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa
bogomips	: 4400.43
clflush size	: 64
cache_alignment	: 64
address sizes	: 46 bits physical, 48 b