In [None]:
import numpy as np
import pandas as pd
import torch
import torchvision
import torchvision.transforms as transforms
from PIL import Image
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from skimage import io
import re
import torch, torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import tqdm

In [None]:
!wget http://vision.cs.utexas.edu/projects/finegrained/utzap50k/ut-zap50k-images.zip
!unzip ut-zap50k-images.zip

In [None]:
# https://drive.google.com/file/d/1yU9MA5pbGx8ScsnQ0PoqQp4KgeEpvfqL/view?usp=sharing
meta = pd.read_csv("meta-data.csv")
print("Labels file found")
meta.head()

Labels file found


Unnamed: 0,CID,Category,SubCategory,HeelHeight,Insole,Closure,Gender,Material,ToeStyle
0,100627-72,Shoes,Oxfords,,Leather,Lace up,Men,Leather,Capped Toe;Round Toe
1,100627-255,Shoes,Oxfords,,Leather,Lace up,Men,Leather,Capped Toe;Round Toe
2,100657-72,Shoes,Oxfords,,Leather;Padded;Removable,Lace up,Men,Leather,Capped Toe;Round Toe
3,100657-216,Shoes,Oxfords,,Leather;Padded;Removable,Lace up,Men,Leather,Capped Toe;Round Toe
4,101026-3,Boots,Mid-Calf,1in - 1 3/4in,Leather;Padded,Pull-on,Men,Leather;Rubber,Square Toe;Closed Toe


In [None]:
import os
from collections import defaultdict
import random

folder = "ut-zap50k-images"
classes = {}
numofpics = defaultdict(int)
img_paths = []
count = 0
for cat in os.listdir(folder):
  folder1 = folder + "/" + cat
  for subcat in os.listdir(folder1):
    folder2 = folder1 + "/" + subcat
    classes[cat + ", " + subcat] = count
    classes[count] = cat + ", " + subcat
    count += 1
    for brand in os.listdir(folder2):
      folder3 = folder2 + "/" + brand
      for pic in os.listdir(folder3):
        if random.randint(1,10) > 6:
          continue
        numofpics[cat + ", " + subcat] += 1
        img_paths.append(folder3 + "/" + pic)

print(len(img_paths))
numofpics

29988


defaultdict(int,
            {'Boots, Ankle': 3520,
             'Boots, Knee High': 1283,
             'Boots, Mid-Calf': 2812,
             'Boots, Over the Knee': 28,
             'Boots, Prewalker Boots': 2,
             'Sandals, Athletic': 6,
             'Sandals, Flat': 3385,
             'Sandals, Heel': 72,
             'Shoes, Boat Shoes': 395,
             'Shoes, Clogs and Mules': 871,
             'Shoes, Crib Shoes': 12,
             'Shoes, Firstwalker': 216,
             'Shoes, Flats': 2368,
             'Shoes, Heels': 3457,
             'Shoes, Loafers': 1734,
             'Shoes, Oxfords': 1218,
             'Shoes, Prewalker': 148,
             'Shoes, Sneakers and Athletic Shoes': 7674,
             'Slippers, Boot': 13,
             'Slippers, Slipper Flats': 768,
             'Slippers, Slipper Heels': 6})

In [None]:
temp = numofpics.copy()
temp["Other"] = 0
for key, val in numofpics.items():
  if val < 1000:
    temp["Other"] += val
    temp.pop(key)
    classes.pop(key)
    classes[key] = 22

classes[22] = "Other"
classes["Other"] = 22

numofpics = temp.copy()

In [None]:
numofpics

defaultdict(int,
            {'Boots, Ankle': 3520,
             'Boots, Knee High': 1283,
             'Boots, Mid-Calf': 2812,
             'Other': 2537,
             'Sandals, Flat': 3385,
             'Shoes, Flats': 2368,
             'Shoes, Heels': 3457,
             'Shoes, Loafers': 1734,
             'Shoes, Oxfords': 1218,
             'Shoes, Sneakers and Athletic Shoes': 7674})

In [None]:
def get_label(img_path, meta):
  crop = re.search(r"/[0-9]+.[0-9]+", img_path).group(0)[1:]
  code = crop.replace('.','-')
  label = "Other"
  try:
    label = ", ".join(np.array(meta.loc[meta['CID'] == code])[0][1:3])
  except:
    print("Error in label at",img_path, code, np.array(meta.loc[meta['CID'] == code]))
  return label

In [None]:
class ShoesDataset(Dataset):
  def __init__(self, csv_path, img_paths, transform):
        """
        Args:
            csv_path (string): Csv file with labels.
            img_paths (list): List of all images' path names.
            transform (callable): Optional transform to be applied
                on a sample.
        """
        self.csv = pd.read_csv("meta-data.csv")
        self.transform = transform
        tmpi = []
        self.labels = []
        for i in tqdm.tqdm(img_paths, "Loading"):
          img = io.imread(i)
          if img.shape != (102, 136, 3):
            continue
          self.labels.append(classes[get_label(i, self.csv)])
          tmpi.append(transform(img))
        
        self.imgs = torch.stack(tmpi)
        

  def __len__(self):
      return len(self.imgs)

  def __getitem__(self, idx):
      if torch.is_tensor(idx):
          idx = idx.tolist()
      
      return [self.imgs[idx], self.labels[idx]]

In [None]:
transform = transforms.Compose(
      [transforms.ToTensor(),
      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

ds = ShoesDataset("meta-data.csv", img_paths, transform)
ds[0]

Loading:  82%|████████▏ | 24717/29988 [01:37<00:20, 257.26it/s]

Error in label at ut-zap50k-images/Boots/Mid-Calf/Primigi Kids/8022042.89.jpg 8022042-89 []


Loading: 100%|██████████| 29988/29988 [01:58<00:00, 253.73it/s]


[tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          ...,
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.]],
 
         [[1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          ...,
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.]],
 
         [[1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          ...,
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.]]]), 0]

In [None]:
import time
time.process_time()
for i in range(len(ds)):
  if i % 5000 == 0:
    print(i)
print(time.process_time(), "ms")

0
5000
10000
15000
20000
25000
122.69918855 ms


In [None]:
train_size = int(0.8 * len(ds))
test_size = len(ds) - train_size
trainset, testset = torch.utils.data.random_split(ds, [train_size, test_size])

trainloader = torch.utils.data.DataLoader(trainset, batch_size=50,
                                          shuffle=True, num_workers=1, )
testloader = torch.utils.data.DataLoader(testset, batch_size=50,
                                         shuffle=False, num_workers=1)


In [None]:
class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)

In [None]:
model = nn.Sequential()

model.add_module('conv1', nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1)) # convolution
model.add_module('bn_conv1', nn.BatchNorm2d(32))
model.add_module('conv1_relu', nn.ReLU())
model.add_module('pool1', nn.MaxPool2d(kernel_size=2, stride=1, padding=1))

model.add_module('conv2', nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=3)) # convolution
model.add_module('bn_conv2', nn.BatchNorm2d(32))
model.add_module('conv2_relu', nn.ReLU())
model.add_module('pool2', nn.MaxPool2d(kernel_size=2))

model.add_module('conv3', nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=3)) # convolution
model.add_module('bn_conv3', nn.BatchNorm2d(32))
model.add_module('conv3_relu', nn.ReLU())
model.add_module('pool3', nn.MaxPool2d(kernel_size=2, stride=1, padding=1))

model.add_module('flat', Flatten())
model.add_module('dense1', nn.Linear(1536, 256))
model.add_module('dense1_relu', nn.ReLU())
model.add_module('dropout0', nn.Dropout(0.3))
model.add_module('dense2', nn.Linear(256, 10))

In [None]:
from torchsummary import summary
summary(model.cuda(),(3,102,136))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 100, 134]             896
       BatchNorm2d-2         [-1, 32, 100, 134]              64
              ReLU-3         [-1, 32, 100, 134]               0
         MaxPool2d-4         [-1, 32, 101, 135]               0
            Conv2d-5           [-1, 32, 33, 45]           9,248
       BatchNorm2d-6           [-1, 32, 33, 45]              64
              ReLU-7           [-1, 32, 33, 45]               0
         MaxPool2d-8           [-1, 32, 16, 22]               0
            Conv2d-9             [-1, 32, 5, 7]           9,248
      BatchNorm2d-10             [-1, 32, 5, 7]              64
             ReLU-11             [-1, 32, 5, 7]               0
        MaxPool2d-12             [-1, 32, 6, 8]               0
          Flatten-13                 [-1, 1536]               0
           Linear-14                  [

In [None]:
opt = torch.optim.Adam(model.parameters(), lr=0.001)

train_loss = []
val_accuracy = []

In [None]:
def compute_loss(X_batch, y_batch):
    X_batch = Variable(torch.FloatTensor(X_batch)).cuda()
    y_batch = Variable(torch.LongTensor(y_batch)).cuda()
    logits = model(X_batch)
    print(f"predicted shape: {logits}")
    return F.cross_entropy(logits, y_batch).mean()

In [None]:
a, b = ds[:50]
compute_loss(a,b)

RuntimeError: ignored

In [None]:
import time
num_epochs = 100 # total amount of full passes over training data
batch_size = 50  # number of samples processed in one SGD iteration


for epoch in range(num_epochs):
    print (num_epochs)
    # In each epoch, we do a full pass over the training data:
    start_time = time.time()
    model.train(True) # enable dropout / batch_norm training behavior
    for (X_batch, y_batch) in trainloader:
        # train on batch
        print(f"x shape: {X_batch.shape}, y shape: {y_batch.shape}")
        loss = compute_loss(X_batch, y_batch)
        loss.backward()
        opt.step()
        opt.zero_grad()
        train_loss.append(loss.data.cpu().numpy())
    print (num_epochs)    
    model.train(False) # disable dropout / use averages for batch_norm
    for X_batch, y_batch in testloader:
        logits = model(Variable(torch.FloatTensor(X_batch)).cuda())
        y_pred = logits.max(1)[1].data
        val_accuracy.append(np.mean( (y_batch.cpu() == y_pred.cpu()).numpy() ))

    print (num_epochs)
    # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, num_epochs, time.time() - start_time))
    print("  training loss (in-iteration): \t{:.6f}".format(
        np.mean(train_loss[-len(train_dataset) // batch_size :])))
    print("  validation accuracy: \t\t\t{:.2f} %".format(
        np.mean(val_accuracy[-len(val_dataset) // batch_size :]) * 100))

100
x shape: torch.Size([50, 3, 102, 136]), y shape: torch.Size([50])


RuntimeError: ignored

In [None]:
print(ds.labels)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 