In [1]:
from torch.utils.data import Dataset, DataLoader
from helpers import *
from tqdm import tqdm
from icecream import ic

In [2]:
data = np.genfromtxt('data.txt', delimiter=' ')
# reshape set up hands to be normalized
data = data.reshape((-1, 21, data.shape[1]))
print(data)
# np.random.shuffle(data)
# print(data.shape)
data = torch.from_numpy(data)
# data = data[torch.randperm(data.size()[0])] # shuffles train_dataset

[[[  0. 503. 389.   0.]
  [  1. 436. 368.   0.]
  [  2. 379. 316.   0.]
  ...
  [ 18. 550. 215.   0.]
  [ 19. 538. 261.   0.]
  [ 20. 527. 297.   0.]]

 [[  0. 505. 387.   0.]
  [  1. 438. 366.   0.]
  [  2. 379. 316.   0.]
  ...
  [ 18. 548. 214.   0.]
  [ 19. 536. 259.   0.]
  [ 20. 527. 296.   0.]]

 [[  0. 502. 388.   0.]
  [  1. 435. 365.   0.]
  [  2. 378. 314.   0.]
  ...
  [ 18. 547. 214.   0.]
  [ 19. 535. 258.   0.]
  [ 20. 526. 294.   0.]]

 ...

 [[  0. 205. 346.   5.]
  [  1. 258. 328.   5.]
  [  2. 295. 293.   5.]
  ...
  [ 18. 141. 205.   5.]
  [ 19. 127. 183.   5.]
  [ 20. 115. 161.   5.]]

 [[  0. 204. 347.   5.]
  [  1. 257. 329.   5.]
  [  2. 294. 292.   5.]
  ...
  [ 18. 139. 206.   5.]
  [ 19. 125. 183.   5.]
  [ 20. 113. 161.   5.]]

 [[  0. 204. 347.   5.]
  [  1. 257. 329.   5.]
  [  2. 295. 293.   5.]
  ...
  [ 18. 140. 206.   5.]
  [ 19. 126. 183.   5.]
  [ 20. 113. 161.   5.]]]


In [3]:
X, y = data[..., :-1], data[..., -1]
# inspect distribution of data, if unbalanced then balance
for fingers_count in range(6):
    ic(y[y == fingers_count].shape[0])

ic| y[y == fingers_count].shape[0]: 50211
ic| y[y == fingers_count].shape[0]: 53088
ic| y[y == fingers_count].shape[0]: 53004
ic| y[y == fingers_count].shape[0]: 52941
ic| y[y == fingers_count].shape[0]: 52794
ic| y[y == fingers_count].shape[0]: 52983


In [4]:
# Balance data
y_shapes = []
temp_y = y[:, 0]
unbal_indices = []
for fingers_count in torch.arange(np.unique(temp_y).shape[0]):
    unbal_indices.append(np.where(temp_y == fingers_count)[0])
    shape_ = unbal_indices[fingers_count].shape[0]
    y_shapes.append(shape_)

ic(y_shapes)
bal_indices = []
min_shape = min(y_shapes)
for fingers_count in range(len(unbal_indices)):
    bal_indices.append(unbal_indices[fingers_count][0:min_shape])

# ic(bal_indices)
bal_indices = np.array(bal_indices).flatten()
ic(bal_indices.shape)
y_new = y[bal_indices]
X_new = X[bal_indices]
ic(X_new.shape)
ic(y_new.shape)

ic| y_shapes: [2391, 2528, 2524, 2521, 2514, 2523]
ic| bal_indices.shape: (14346,)
ic| X_new.shape: torch.Size([14346, 21, 3])
ic| y_new.shape: torch.Size([14346, 21])


torch.Size([14346, 21])

In [5]:
X = X_new
y = y_new

for fingers_count in range(6):
    ic(y[y == fingers_count].shape[0])

ic| y[y == fingers_count].shape[0]: 50211
ic| y[y == fingers_count].shape[0]: 50211
ic| y[y == fingers_count].shape[0]: 50211
ic| y[y == fingers_count].shape[0]: 50211
ic| y[y == fingers_count].shape[0]: 50211
ic| y[y == fingers_count].shape[0]: 50211


In [6]:
# Normalize each hand relative to itself. Removes dependency of hand positioning in camera field of view
for i, hand in enumerate(X):
    X[i] = normalize_hand(hand)

In [7]:
class HandDataset(Dataset):
    def __init__(self, data):
        self.x, self.y = data[..., :-1], data[..., -1]
        self.n_samples = y.shape[0]

    def __len__(self):
        return self.n_samples

    def __getitem__(self, index):
        ic(self.x.shape, self.y.shape)
        reshaped_y = self.y.reshape(-1, 21, 1)
        ic(reshaped_y.shape)
        return torch.cat([self.x[index], reshaped_y[index]], dim=-1)

    def split(self, test_size=.2):
        split = int(self.x.shape[0] * (1 - test_size))
        return self[:split, :], self[split:, :]  # train, test


# Combine X and y to preserve data during shuffle
reshaped_y = y.reshape(-1, 21, 1)
data = torch.cat([X, reshaped_y], dim=-1)

# Shuffle
dataset = HandDataset(data[torch.randperm(data.shape[0])])  # shuffles train_dataset
train_dataset, test_dataset = dataset.split()

# hyper parameters
### DEFINED IN helpers.py

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=False)
# for i in range(len(train_loader)):
#     print(train_loader.dataset[i, 0, 3].item())
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

ic| self.x.shape: torch.Size([14346, 21, 3])
    self.y.shape: torch.Size([14346, 21])
ic| reshaped_y.shape: torch.Size([14346, 21, 1])
ic| self.x.shape: torch.Size([14346, 21, 3])
    self.y.shape: torch.Size([14346, 21])
ic| reshaped_y.shape: torch.Size([14346, 21, 1])


In [8]:
ic(len(train_dataset))
ic(len(train_loader))
ic(len(test_dataset))
ic(len(test_loader))

ic| len(train_dataset): 11476
ic| len(train_loader): 115
ic| len(test_dataset): 2870
ic| len(test_loader): 29


29

In [9]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.LeakyReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        return out


model = NeuralNet(ic(input_size), ic(hidden_size), ic(num_classes)).to(device)
# loss and optimization
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


def normalize_hands(hands):
    for i, hand in enumerate(hands):
        hands[i] = torch.from_numpy(normalize_hand(hand.np()).astype(np.float32()))
    return hands


# training loop
n_total_steps = len(train_loader)
for epoch in tqdm(range(num_epochs)):
    for i, sample in enumerate(train_loader):
        # extract data from sample
        hands = sample[..., :-1].float().to(device)
        labels = sample[..., -1][:, 0].long().to(device)

        # hands = normalize_hands(hands).float()
        hands = hands.reshape(-1, input_size)

        # forward pass
        outputs = model(hands).float()
        loss = criterion(outputs, labels)

        # backward pass
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    # if epoch % 5 == 0:
    #     print(f'epoch {epoch + 1}/{num_epochs}, loss = {loss.item():.4f}')

print('Training Complete!')


ic| input_size: 63
ic| hidden_size: 50
ic| num_classes: 6
  6%|▌         | 3/50 [00:00<00:04, 10.12it/s]

epoch 1/50, loss = 0.5741


 14%|█▍        | 7/50 [00:00<00:04, 10.57it/s]

epoch 6/50, loss = 0.1288


 26%|██▌       | 13/50 [00:01<00:03, 10.62it/s]

epoch 11/50, loss = 0.0349


 34%|███▍      | 17/50 [00:01<00:03, 10.72it/s]

epoch 16/50, loss = 0.0200


 46%|████▌     | 23/50 [00:02<00:02, 10.87it/s]

epoch 21/50, loss = 0.0074


 54%|█████▍    | 27/50 [00:02<00:02, 10.75it/s]

epoch 26/50, loss = 0.0083


 66%|██████▌   | 33/50 [00:03<00:01, 11.03it/s]

epoch 31/50, loss = 0.0091


 74%|███████▍  | 37/50 [00:03<00:01, 11.12it/s]

epoch 36/50, loss = 0.0117


 86%|████████▌ | 43/50 [00:03<00:00, 11.01it/s]

epoch 41/50, loss = 0.0078


 94%|█████████▍| 47/50 [00:04<00:00, 10.92it/s]

epoch 46/50, loss = 0.0077


100%|██████████| 50/50 [00:04<00:00, 10.78it/s]


In [10]:
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    i = 1
    for sample in test_loader:
        hands = sample[..., :-1].float().to(device)
        labels = sample[..., -1][:, 0].long().to(device)

        # hands = normalize_hands(hands).float()
        hands = hands.reshape(-1, input_size)

        outputs = model(hands)

        # value, index
        _, pred = torch.max(outputs, 1)

        # ic(i, labels.shape[0], (pred == labels).sum().item())
        i += 1
        n_samples += labels.shape[0]
        n_correct += (pred == labels).sum().item()

    acc = 100 * n_correct / n_samples

    print('accuracy =', acc)

ic| i: 1, labels.shape[0]: 100, (pred == labels).sum().item(): 100
ic| i: 2, labels.shape[0]: 100, (pred == labels).sum().item(): 98
ic| i: 3, labels.shape[0]: 100, (pred == labels).sum().item(): 99
ic| i: 4, labels.shape[0]: 100, (pred == labels).sum().item(): 99
ic| i: 5, labels.shape[0]: 100, (pred == labels).sum().item(): 100
ic| i: 6, labels.shape[0]: 100, (pred == labels).sum().item(): 99
ic| i: 7, labels.shape[0]: 100, (pred == labels).sum().item(): 100
ic| i: 8, labels.shape[0]: 100, (pred == labels).sum().item(): 98
ic| i: 9, labels.shape[0]: 100, (pred == labels).sum().item(): 98
ic| i: 10, labels.shape[0]: 100, (pred == labels).sum().item(): 98
ic| i: 11, labels.shape[0]: 100, (pred == labels).sum().item(): 99
ic| i: 12, labels.shape[0]: 100, (pred == labels).sum().item(): 100
ic| i: 13, labels.shape[0]: 100, (pred == labels).sum().item(): 100
ic| i: 14, labels.shape[0]: 100, (pred == labels).sum().item(): 100
ic| i: 15, labels.shape[0]: 100, (pred == labels).sum().item(): 9

accuracy = 99.30313588850174


In [11]:
save_model(model.state_dict(), 'finalized_model.pth')