In [None]:
from torchvision.models import resnet18
import torch
import sys

from torchvision import transforms
from PIL import Image
import pandas as pd
import json

In [None]:
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

labels = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic+light", "fire+hydrant", "stop+sign", "parking+meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports+ball", "kite", "baseball+bat", "baseball+glove", "skateboard", "surfboard", "tennis+racket", "bottle", "wine+glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot+dog", "pizza", "donut", "cake", "chair", "couch", "potted+plant", "bed", "dining+table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell+phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy+bear", "hair+drier", "toothbrush"]

In [None]:
def get_model(out_features = 1, mode = 'tune', parent = 'pretrained', randomize = False):
    # Load the model
    model = resnet18(pretrained = (parent == 'pretrained'))
    # Change the classification layer
    model.fc = torch.nn.Linear(in_features = 512, out_features = out_features)
    # Load the in the parent model weights
    if parent != 'pretrained':
        model.load_state_dict(torch.load(parent, map_location=torch.device('cpu')))
    if randomize:
        model.fc = torch.nn.Linear(in_features = 512, out_features = out_features)
    # Setup the trainable parameters
    if mode == 'tune':
        return model, model.parameters()
    elif mode == 'transfer':
        for param in model.parameters():
            param.requires_grad = False
        model.fc.weight.requires_grad = True
        model.fc.bias.requires_grad = True
        return model, model.fc.parameters()
    elif mode == 'eval':
        for param in model.parameters():
            param.requires_grad = False
        model.eval()
        return model
    else:
        print('ResNet.py: Could not determine trainable parameters')
        sys.exit(0)

In [None]:
m = get_model(out_features=80, mode = 'eval', parent='../../../model.pt')

In [None]:
img = Image.open("/Users/acabrera/dev/data/coco-2017/val2017/000000015335.jpg")

In [None]:
img.show()

In [None]:
img_t = preprocess(img)

In [None]:
m(img_t.unsqueeze(0))

In [None]:
with open("/Users/acabrera/dev/data/coco-2017/instances_val2017.json") as f:
	data = json.load(f)

In [None]:
data.keys()

In [None]:
len(data['images']), data['images'][0], data['annotations'][0], data['categories'][1]

In [None]:
df = pd.DataFrame.from_dict(data['images'])
df.set_index('id', inplace=True)

In [None]:
df['label'] = '['
for c in labels:
	df[c] = 0
for ann in data['annotations']:
	idx = ann['category_id'] - 1
	if ann['category_id'] > 79:
		continue
	val = df.at[ann['image_id'], labels[idx]] 
	if val == 0:
		df.at[ann['image_id'], 'label'] = df.at[ann['image_id'], 'label'] + labels[idx] + ','
	df.at[ann['image_id'], labels[idx]] = val + 1
df['label'] = df['label'].str.slice(0, -1) + ']'

In [None]:
df['label']

In [None]:
df

In [None]:
df.to_csv('coco-tagging.csv')