In [2]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, Dataset, DataLoader
import torchvision.transforms as transforms
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from helpers import *
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from icecream import ic

In [3]:
data = np.genfromtxt('data.txt', delimiter=' ')
# reshape set up hands to be normalized
data = data.reshape((-1, 21, data.shape[1]))
print(data)
# np.random.shuffle(data)
# print(data.shape)
data = torch.from_numpy(data)
# data = data[torch.randperm(data.size()[0])] # shuffles train_dataset

[[[  0. 503. 389.   0.]
  [  1. 436. 368.   0.]
  [  2. 379. 316.   0.]
  ...
  [ 18. 550. 215.   0.]
  [ 19. 538. 261.   0.]
  [ 20. 527. 297.   0.]]

 [[  0. 505. 387.   0.]
  [  1. 438. 366.   0.]
  [  2. 379. 316.   0.]
  ...
  [ 18. 548. 214.   0.]
  [ 19. 536. 259.   0.]
  [ 20. 527. 296.   0.]]

 [[  0. 502. 388.   0.]
  [  1. 435. 365.   0.]
  [  2. 378. 314.   0.]
  ...
  [ 18. 547. 214.   0.]
  [ 19. 535. 258.   0.]
  [ 20. 526. 294.   0.]]

 ...

 [[  0. 467. 340.   5.]
  [  1. 422. 326.   5.]
  [  2. 386. 297.   5.]
  ...
  [ 18. 539. 212.   5.]
  [ 19. 558. 191.   5.]
  [ 20. 574. 172.   5.]]

 [[  0. 466. 340.   5.]
  [  1. 421. 326.   5.]
  [  2. 385. 297.   5.]
  ...
  [ 18. 539. 211.   5.]
  [ 19. 558. 191.   5.]
  [ 20. 573. 172.   5.]]

 [[  0. 466. 340.   5.]
  [  1. 421. 326.   5.]
  [  2. 385. 297.   5.]
  ...
  [ 18. 539. 211.   5.]
  [ 19. 558. 191.   5.]
  [ 20. 573. 172.   5.]]]


In [4]:
X, y = data[..., :-1], data[..., -1]
# inspect distribution of data, if unbalanced then balance
for fingers_count in range(6):
    ic(y[y == fingers_count].shape[0])

ic| y[y == fingers_count].shape[0]: 37653
ic| y[y == fingers_count].shape[0]: 39774
ic| y[y == fingers_count].shape[0]: 39774
ic| y[y == fingers_count].shape[0]: 39732
ic| y[y == fingers_count].shape[0]: 39606
ic| y[y == fingers_count].shape[0]: 39753


In [5]:
# Balance data
y_shapes = []
temp_y = y[:, 0]
unbal_indices = []
for fingers_count in torch.arange(np.unique(temp_y).shape[0]):
    unbal_indices.append(np.where(temp_y == fingers_count)[0])
    shape_ = unbal_indices[fingers_count].shape[0]
    y_shapes.append(shape_)

ic(y_shapes)
bal_indices = []
min_shape = min(y_shapes)
for fingers_count in range(len(unbal_indices)):
    bal_indices.append(unbal_indices[fingers_count][0:min_shape])

# ic(bal_indices)
bal_indices = np.array(bal_indices).flatten()
ic(bal_indices.shape)
y_new = y[bal_indices]
X_new = X[bal_indices]
ic(X_new.shape)
ic(y_new.shape)

ic| y_shapes: [1793, 1894, 1894, 1892, 1886, 1893]
ic| bal_indices.shape: (10758,)
ic| X_new.shape: torch.Size([10758, 21, 3])
ic| y_new.shape: torch.Size([10758, 21])


torch.Size([10758, 21])

In [6]:
X = X_new
y = y_new

for fingers_count in range(6):
    ic(y[y == fingers_count].shape[0])

ic| y[y == fingers_count].shape[0]: 37653
ic| y[y == fingers_count].shape[0]: 37653
ic| y[y == fingers_count].shape[0]: 37653
ic| y[y == fingers_count].shape[0]: 37653
ic| y[y == fingers_count].shape[0]: 37653
ic| y[y == fingers_count].shape[0]: 37653


In [7]:
# Normalize each hand relative to itself. Removes dependency of hand positioning in camera field of view
for i, hand in enumerate(X):
    X[i] = normalize_hand(hand)

In [8]:
class HandDataset(Dataset):
    def __init__(self, data):
        self.x, self.y = data[..., :-1], data[..., -1]
        self.n_samples = y.shape[0]

    def __len__(self):
        return self.n_samples

    def __getitem__(self, index):
        ic(self.x.shape, self.y.shape)
        reshaped_y = self.y.reshape(-1, 21, 1)
        ic(reshaped_y.shape)
        return torch.cat([self.x[index], reshaped_y[index]], dim=-1)

    def split(self, test_size=.2):
        split = int(self.x.shape[0] * (1 -test_size))
        return self[:split, :], self[split:, :]  # train, test

# Combine X and y to preserve data during shuffle
reshaped_y = y.reshape(-1, 21, 1)
data = torch.cat([X, reshaped_y], dim=-1)

# Shuffle
dataset = HandDataset(data[torch.randperm(data.shape[0])]) # shuffles train_dataset
train_dataset, test_dataset = dataset.split()

# hyper parameters
### DEFINED IN helpers.py

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=False)
# for i in range(len(train_loader)):
#     print(train_loader.dataset[i, 0, 3].item())
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

ic| self.x.shape: torch.Size([10758, 21, 3])
    self.y.shape: torch.Size([10758, 21])
ic| reshaped_y.shape: torch.Size([10758, 21, 1])
ic| self.x.shape: torch.Size([10758, 21, 3])
    self.y.shape: torch.Size([10758, 21])
ic| reshaped_y.shape: torch.Size([10758, 21, 1])


In [9]:
ic(len(train_dataset))
ic(len(train_loader))
ic(len(test_dataset))
ic(len(test_loader))

ic| len(train_dataset): 8606
ic| len(train_loader): 87
ic| len(test_dataset): 2152
ic| len(test_loader): 22


22

In [10]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.LeakyReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        return out


model = NeuralNet(ic(input_size), ic(hidden_size), ic(num_classes))

# loss and optimization
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

def normalize_hands(hands):
    for i, hand in enumerate(hands):
        hands[i] = torch.from_numpy(normalize_hand(hand.np()).astype(np.float32()))
    return hands

# training loop
n_total_steps = len(train_loader)
sc = StandardScaler()
for epoch in range(num_epochs):
    for i, sample in tqdm(enumerate(train_loader)):
        hands, labels = sample[..., :-1].float(), sample[..., -1][:, 0].long()

        # hands = normalize_hands(hands).float()
        hands = hands.reshape(-1, input_size)

        # forward pass
        outputs = model(hands).float()
        loss = criterion(outputs, labels)

        # backward pass
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if i + 1 == len(train_loader):
            print(f'epoch {epoch + 1}/{num_epochs}, loss = {loss.item():.4f}')



ic| input_size: 63
ic| hidden_size: 50
ic| num_classes: 6
87it [00:00, 140.46it/s]


epoch 1/3, loss = 0.2476


87it [00:00, 632.12it/s]


epoch 2/3, loss = 0.0751


87it [00:00, 665.89it/s]

epoch 3/3, loss = 0.0417





In [11]:
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    i = 1
    for sample in test_loader:
        hands, labels = sample[..., :-1].float(), sample[..., -1][:, 0].long()

        # hands = normalize_hands(hands).float()
        hands = hands.reshape(-1, input_size)

        outputs = model(hands)

        # value, index
        _, pred = torch.max(outputs, 1)

        ic(i, labels.shape[0], (pred == labels).sum().item())
        i += 1
        n_samples += labels.shape[0]
        n_correct += (pred == labels).sum().item()

    acc = 100 * n_correct / n_samples

    print('accuracy =', acc)

ic| i: 1, labels.shape[0]: 100, (pred == labels).sum().item(): 100
ic| i: 2, labels.shape[0]: 100, (pred == labels).sum().item(): 97
ic| i: 3, labels.shape[0]: 100, (pred == labels).sum().item(): 97
ic| i: 4, labels.shape[0]: 100, (pred == labels).sum().item(): 100
ic| i: 5, labels.shape[0]: 100, (pred == labels).sum().item(): 100
ic| i: 6, labels.shape[0]: 100, (pred == labels).sum().item(): 100
ic| i: 7, labels.shape[0]: 100, (pred == labels).sum().item(): 100
ic| i: 8, labels.shape[0]: 100, (pred == labels).sum().item(): 98
ic| i: 9, labels.shape[0]: 100, (pred == labels).sum().item(): 100
ic| i: 10, labels.shape[0]: 100, (pred == labels).sum().item(): 100
ic| i: 11, labels.shape[0]: 100, (pred == labels).sum().item(): 99
ic| i: 12, labels.shape[0]: 100, (pred == labels).sum().item(): 100
ic| i: 13, labels.shape[0]: 100, (pred == labels).sum().item(): 100
ic| i: 14, labels.shape[0]: 100, (pred == labels).sum().item(): 99
ic| i: 15, labels.shape[0]: 100, (pred == labels).sum().item()

accuracy = 99.44237918215613


In [12]:
save_model(model.state_dict(), 'finalized_model.pth')