In [1]:
import torch
import random
import numpy as np
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import pandas as pd

random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.backends.cudnn.deterministic = True

In [36]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')
LABELS = [    
    "JUMPING",
    "JUMPING_JACKS",
    "BOXING",
    "WAVING_2HANDS",
    "WAVING_1HAND",
    "CLAPPING_HANDS"

] 
DATASET_PATH = "poses_dataset/"

X_train_path = DATASET_PATH + "X_train.txt"
X_test_path = DATASET_PATH + "X_test.txt"

y_train_path = DATASET_PATH + "Y_train.txt"
y_test_path = DATASET_PATH + "Y_test.txt"

# Hyper-parameters 
# input_size = 784 # 28x28
num_classes = 7
num_epochs = 100
batch_size = 100
learning_rate = 0.001

input_size = 36
sequence_length = 32
hidden_size = 128
num_layers = 2

In [3]:
device

device(type='cuda')

In [26]:
column_names = [  "j0_x",  "j0_y", "j1_x", "j1_y" , "j2_x", "j2_y", "j3_x", "j3_y", "j4_x", "j4_y", "j5_x", "j5_y", "j6_x", "j6_y", "j7_x", "j7_y", "j8_x", "j8_y", "j9_x", "j9_y", "j10_x", "j10_y", "j11_x", "j11_y", "j12_x", "j12_y", "j13_x", "j13_y", 'j14_x', "j14_y", "j15_x", "j15_y", "j16_x", "j16_y", "j17_x", "j17_y" ]
x_train_data = pd.read_csv(X_train_path, sep=",", names=column_names, header=None, dtype=np.float32)
x_test_data = pd.read_csv(X_test_path, sep=",", names=column_names, header=None, dtype=np.float32)
y_train_data = pd.read_csv(y_train_path, names=["labels"], dtype=np.int_)
y_test_data = pd.read_csv(y_test_path, names=["labels"], dtype=np.int_)

In [23]:
def normalize(data):
    for i, column in enumerate(data):
        if i % 2 == 0:
            data[column] = data[column] / 640
        else:
            data[column] = data[column] / 480
    return data

In [27]:
normalize(x_test_data)
normalize(x_train_data)

Unnamed: 0,j0_x,j0_y,j1_x,j1_y,j2_x,j2_y,j3_x,j3_y,j4_x,j4_y,...,j13_x,j13_y,j14_x,j14_y,j15_x,j15_y,j16_x,j16_y,j17_x,j17_y
0,0.462366,0.336623,0.480770,0.423777,0.439916,0.423683,0.429683,0.524087,0.417491,0.610944,...,0.513325,0.858171,0.460228,0.325875,0.476566,0.325871,0.000000,0.000000,0.497005,0.336733
1,0.462273,0.336667,0.480756,0.423767,0.439889,0.423719,0.429670,0.524113,0.417486,0.608252,...,0.513270,0.855479,0.458206,0.328513,0.474541,0.328554,0.000000,0.000000,0.496912,0.336779
2,0.458275,0.336708,0.480711,0.423771,0.437856,0.423713,0.429666,0.524148,0.417597,0.602860,...,0.513316,0.858221,0.452147,0.328513,0.468434,0.328571,0.000000,0.000000,0.494963,0.336831
3,0.456216,0.336788,0.480655,0.426412,0.435809,0.426373,0.429689,0.529404,0.421553,0.602840,...,0.513248,0.858223,0.450158,0.328598,0.466386,0.331219,0.000000,0.000000,0.494875,0.336869
4,0.450098,0.350250,0.478691,0.437156,0.431850,0.439808,0.429703,0.532210,0.429692,0.613710,...,0.515272,0.858092,0.448084,0.336658,0.464270,0.336669,0.000000,0.000000,0.492816,0.347583
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
723995,0.572323,0.249740,0.545817,0.331265,0.513305,0.334060,0.496944,0.439946,0.515239,0.507831,...,0.557984,0.743981,0.560053,0.241677,0.574331,0.236296,0.531556,0.249948,0.000000,0.000000
723996,0.574264,0.247185,0.545855,0.331294,0.513342,0.334083,0.496923,0.439842,0.509138,0.507875,...,0.556023,0.743962,0.560064,0.239012,0.574341,0.233590,0.531623,0.249950,0.000000,0.000000
723997,0.572278,0.247181,0.545858,0.331319,0.515255,0.334127,0.496880,0.442504,0.503084,0.521377,...,0.556000,0.744010,0.560003,0.239012,0.574309,0.233606,0.531602,0.249977,0.000000,0.000000
723998,0.572309,0.247146,0.545872,0.333883,0.515258,0.336619,0.496959,0.442683,0.496922,0.537717,...,0.556006,0.744062,0.560016,0.238944,0.574327,0.233575,0.531600,0.249969,0.000000,0.000000


In [28]:
x_test_data.head

<bound method NDFrame.head of             j0_x      j0_y      j1_x      j1_y      j2_x      j2_y      j3_x  \
0       0.480608  0.339533  0.499006  0.429050  0.458230  0.426417  0.445991   
1       0.480573  0.339540  0.499003  0.429056  0.458214  0.426448  0.445981   
2       0.478591  0.339481  0.498986  0.429035  0.456236  0.426415  0.445988   
3       0.478508  0.336883  0.498959  0.426423  0.456230  0.426333  0.445983   
4       0.476562  0.336775  0.497027  0.426348  0.454209  0.423727  0.445997   
...          ...       ...       ...       ...       ...       ...       ...   
184027  0.572295  0.247225  0.545781  0.331338  0.513191  0.334079  0.494963   
184028  0.572328  0.247229  0.545794  0.333888  0.513186  0.336617  0.494970   
184029  0.572286  0.247194  0.545811  0.333858  0.513228  0.334133  0.494969   
184030  0.572294  0.247144  0.545798  0.333860  0.513242  0.336677  0.494942   
184031  0.572277  0.247154  0.545809  0.331348  0.513237  0.336660  0.494948   

         

In [29]:
type(y_train_data.iloc[0,0])

numpy.int64

In [11]:
# Fully connected neural network with one hidden layer
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        # -> x needs to be: (batch_size, seq, input_size)
        
        # or:
        #self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        #self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        # Set initial hidden states (and cell states for LSTM)
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        
        # x: (n, 28, 28), h0: (2, n, 128)
        
        # Forward propagate RNN
        # out, _ = self.rnn(x, h0)  
        # or:
        out, _ = self.lstm(x, (h0,c0))  
        
        # out: tensor of shape (batch_size, seq_length, hidden_size)
        # out: (n, 28, 128)
        
        # Decode the hidden state of the last time step
        out = out[:, -1, :]
        # out: (n, 128)
         
        out = self.fc(out)
        # out: (n, 10)
        return out

In [14]:
model = LSTM(input_size, hidden_size, num_layers, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

In [15]:
def chunker(x_seq, y_seq, seq_size, batch_size):
    for batch_pos in range(0, len(y_seq), batch_size):
        x_batch = list()
        for pos in range(batch_size):
            # print(x_seq.iloc[pos*seq_size:pos*seq_size + seq_size])
            x_batch.append(x_seq.iloc[pos*seq_size:pos*seq_size + seq_size].values)
        
        yield torch.tensor(x_batch), torch.flatten(torch.tensor(y_seq.iloc[batch_pos:batch_pos + batch_size].values))
        

In [34]:
for i, (x_batch, y_batch) in enumerate(chunker(x_test_data, y_test_data, sequence_length, batch_size)):
    if i == 100:
        break
    print(x_batch.shape)

torch.Size([1000, 32, 36])
torch.Size([1000, 32, 36])
torch.Size([1000, 32, 36])
torch.Size([1000, 32, 36])
torch.Size([1000, 32, 36])
torch.Size([1000, 32, 36])


In [37]:
# Train the model
n_total_steps = y_train_data.shape[0] / 32
for epoch in range(num_epochs):
    for i, (x_batch, y_batch) in enumerate(chunker(x_train_data, y_train_data, sequence_length, batch_size)):  
        # origin shape: [N, 1, 28, 28]
        # resized: [N, 28, 28]
        y_batch = y_batch.to(device)
        x_batch = x_batch.to(device)
        
        
        # Forward pass
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
        # if (i+1) % 100 == 0:
        #     print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
            # break
        


Epoch [1/100], Step [1/707.03125], Loss: 2.4927
Epoch [1/100], Step [2/707.03125], Loss: 2.4772
Epoch [1/100], Step [3/707.03125], Loss: 2.4448
Epoch [1/100], Step [4/707.03125], Loss: 2.4001
Epoch [1/100], Step [5/707.03125], Loss: 2.3467
Epoch [1/100], Step [6/707.03125], Loss: 2.2876
Epoch [1/100], Step [7/707.03125], Loss: 2.2249
Epoch [1/100], Step [8/707.03125], Loss: 2.1599
Epoch [1/100], Step [9/707.03125], Loss: 2.0937
Epoch [1/100], Step [10/707.03125], Loss: 2.0270
Epoch [1/100], Step [11/707.03125], Loss: 1.9599
Epoch [1/100], Step [12/707.03125], Loss: 1.8928
Epoch [1/100], Step [13/707.03125], Loss: 1.8254
Epoch [1/100], Step [14/707.03125], Loss: 1.7577
Epoch [1/100], Step [15/707.03125], Loss: 1.6895
Epoch [1/100], Step [16/707.03125], Loss: 1.6204
Epoch [1/100], Step [17/707.03125], Loss: 1.5503
Epoch [1/100], Step [18/707.03125], Loss: 1.4792
Epoch [1/100], Step [19/707.03125], Loss: 1.4070
Epoch [1/100], Step [20/707.03125], Loss: 1.3343
Epoch [1/100], Step [21/707.0

ValueError: Expected input batch_size (100) to match target batch_size (25).

In [10]:
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        outputs = model(images)
        # max returns (value ,index)
        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 10000 test images: {acc} %')

Accuracy of the network on the 10000 test images: 94.92 %
