### build dataset

In [21]:
import glob
import pandas
import numpy as np
import scipy.io

In [22]:
dir_path = glob.glob('./part-affordance-dataset/tools/*')

In [29]:
image_path = []
label_path = []

for d in dir_path:
    img_path = glob.glob(d + '/*.jpg')
    
    for img in img_path:
        multi_hot = np.zeros(7, dtype=np.int64)
        lab_path = img[:-7] + 'label.mat'
        label = scipy.io.loadmat(lab_path)['gt_label']
        for i in range(1, 8):
            if i in label:
                multi_hot[i-1] = 1
        
        image_path.append(img)
        label_path.append(img[:-7] + 'label.npy')
        np.save(img[:-7] + 'label.npy', multi_hot)

In [31]:
len(image_path)

28843

In [34]:
len(label_path)

28843

In [35]:
image_train = []
image_test = []
label_train = []
label_test = []

for i, (img, lab) in enumerate(zip(image_path, label_path)):
    if i%5 == 0:
        image_test.append(img)
        label_test.append(lab)
    else:
        image_train.append(img)
        label_train.append(lab)

In [37]:
df_train = pd.DataFrame({
    'image': image_train,
    'label': label_train},
    columns=['image', 'label']
)

In [38]:
df_test = pd.DataFrame({
    'image': image_test,
    'label': label_test},
    columns=['image', 'label']
)

In [39]:
data = pd.concat([df_train, df_test])

In [40]:
df_train.to_csv('./part-affordance-dataset/train.csv', index=None)
df_test.to_csv('./part-affordance-dataset/test.csv', index=None)
data.to_csv('./part-affordance-dataset/all_data.csv', index=None)

### difine model

In [10]:
import torch.nn as nn
from torchvision import models

In [2]:
vgg = models.vgg16_bn(pretrained=True)

Downloading: "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth" to /Users/yuchi/.torch/models/vgg16_bn-6c64b313.pth
100%|██████████| 553507836/553507836 [03:05<00:00, 2990907.77it/s]


In [11]:
vgg.classifier[6] = nn.Linear(in_features=4096, out_features=7, bias=True)

In [12]:
vgg

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256, kernel_size=(3, 3)

### how to calculate accuracy

In [13]:
import torch

In [22]:
pred = torch.tensor([0, 1, 1, 1, 1, 0, 0])
true = torch.tensor([0, 0, 0, 0, 0, 0, 0])

In [23]:
torch.sum(pred == true)

tensor(3)

In [24]:
true += 1

In [25]:
true

tensor([1, 1, 1, 1, 1, 1, 1])