In [1]:
#
# classifer notebook
#

# for Colab paths
# import sys
# sys.path.append('/content/')
#
#!nvidia-smi
#!nvidia-smi -q


import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import Lambda, Compose
from LandmarkDataset import LandmarkDataset
from torch.utils.data.sampler import SubsetRandomSampler

torch.set_default_dtype(torch.float64)

#
# MODEL
#
# Should be 63 classes:
# Price B/S * 10                 -> 20
# QTY B/S * 10                   -> 20
# QTY B/S 10,20-100              -> 20
# Action Cancel, Market, Garbage ->  3
#                                -> 63
class NN(torch.nn.Module):
    
    def __init__(self, input_size, hidden_size, num_classes):
        super(NN, self).__init__()
        self.fc1 = torch.nn.Linear(input_size, hidden_size)
        self.relu = torch.nn.ReLU()
        self.fc2 = torch.nn.Linear(hidden_size, num_classes)
        #self.fc2 = torch.nn.Linear(hidden_size, 48)
        #self.fc3 = torch.nn.Linear(48, num_classes)
        
        #44 with dropout .025
        #28 with .01
        self.dropout = torch.nn.Dropout(p=0.01)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        #out = self.dropout(out)
        
        out = self.fc2(out)
        #out = self.relu(out)
        #out = self.dropout(out)
        
        #out = self.fc3(out)
        
        return out

    
#
# PARAMS
#

batch_size = 12
hidden_size = 96
learning_rate = .01
num_epochs = 250


#
# INITIAL DATA
#

# for directory load each file
# generate mapping of file -> class -> idx
# if available copy data to gpu (model set below)
# typically need model + tensors (label and value) moved over. 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

transformations = Compose([
    Lambda(lambda x: torch.tensor(x.values).to(device))
])
target_transformations = Compose([
    Lambda(lambda x: torch.tensor(x).to(device))
])

dataset = LandmarkDataset("/home/jovyan/train/data",
                          "/home/jovyan/model",
                          transform=transformations)

num_classes = dataset.num_class
input_size = dataset.input_size() #2 * (21 * 3) + 12 + 1 + 10 #149

training_indices, validation_indices = dataset.train_validation_indices(split_p = .2)
        
train_sampler = SubsetRandomSampler(training_indices)
valid_sampler = SubsetRandomSampler(validation_indices)
    
train_dataloader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
valid_dataloader = DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler)


#
# INIT MODELS, LOSS FN, GRAD
#

model = NN(input_size, hidden_size, num_classes).to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

#
# TRAIN
#

running_loss_epoch = 0
for epoch in range(num_epochs):
    for batch_idx, (labels, landmarks) in enumerate(train_dataloader):  

        # zero out accumulated gradients
        optimizer.zero_grad()
        
        # forward pass
        outputs = model(landmarks)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        loss.backward()
        optimizer.step()
        
        # NB: len(dataloader) is num of batches
        running_loss_epoch += loss.item()
        
        if ((epoch+1) % 10 == 0) and ((batch_idx+1) % len(train_dataloader) == 0):
            print ('Epoch [{}/{}], Loss per (random) batch: {:.4f}, Running Loss (per N epoch): {:.4f}' 
                   .format(epoch+1, num_epochs, loss.item(), running_loss_epoch))
            running_loss_epoch = 0


  from .autonotebook import tqdm as notebook_tqdm


Epoch [10/250], Loss per (random) batch: 1.1617, Running Loss (per N epoch): 15310.5742
Epoch [20/250], Loss per (random) batch: 0.4129, Running Loss (per N epoch): 11336.3192
Epoch [30/250], Loss per (random) batch: 0.2246, Running Loss (per N epoch): 10577.6996
Epoch [40/250], Loss per (random) batch: 0.9043, Running Loss (per N epoch): 10061.8993
Epoch [50/250], Loss per (random) batch: 1.2999, Running Loss (per N epoch): 9685.4794
Epoch [60/250], Loss per (random) batch: 2.0129, Running Loss (per N epoch): 9515.7403
Epoch [70/250], Loss per (random) batch: 3.1914, Running Loss (per N epoch): 9253.8470
Epoch [80/250], Loss per (random) batch: 4.2592, Running Loss (per N epoch): 9164.3803
Epoch [90/250], Loss per (random) batch: 0.5148, Running Loss (per N epoch): 9064.7203
Epoch [100/250], Loss per (random) batch: 0.4376, Running Loss (per N epoch): 8948.9968
Epoch [110/250], Loss per (random) batch: 0.0215, Running Loss (per N epoch): 8915.5206
Epoch [120/250], Loss per (random) ba

In [2]:
#
# EXPORT
#
# NB: LandmarkDataset.py updates meta.json with class index
#
dummy_input = torch.zeros(input_size)
#model.load_state_dict(torch.load('./model_overfit.pt'))
torch.onnx.export(model, dummy_input, 'onnx_model.onnx', export_params=True,
                  input_names = ['landmarks'], output_names = ['class'], verbose=True)

#import onnx
#onnx_model = onnx.load("./onnx_model.onnx")
#onnx.checker.check_model(onnx_model)


Exported graph: graph(%landmarks : Double(150, strides=[1], requires_grad=0, device=cpu),
      %fc1.bias : Double(96, strides=[1], requires_grad=1, device=cpu),
      %fc2.bias : Double(63, strides=[1], requires_grad=1, device=cpu),
      %onnx::MatMul_12 : Double(150, 96, strides=[1, 150], requires_grad=0, device=cpu),
      %onnx::MatMul_13 : Double(96, 63, strides=[1, 96], requires_grad=0, device=cpu)):
  %onnx::Add_6 : Double(96, strides=[1], device=cpu) = onnx::MatMul[onnx_name="MatMul_0"](%landmarks, %onnx::MatMul_12) # /opt/conda/lib/python3.10/site-packages/torch/nn/modules/linear.py:114:0
  %input : Double(96, strides=[1], requires_grad=1, device=cpu) = onnx::Add[onnx_name="Add_1"](%fc1.bias, %onnx::Add_6) # /opt/conda/lib/python3.10/site-packages/torch/nn/modules/linear.py:114:0
  %onnx::MatMul_8 : Double(96, strides=[1], requires_grad=1, device=cpu) = onnx::Relu[onnx_name="Relu_2"](%input) # /opt/conda/lib/python3.10/site-packages/torch/nn/functional.py:1453:0
  %onnx::Add_

In [3]:
#
# EVAL
#
torch.set_printoptions(precision=4, sci_mode=False)
softmax = torch.nn.Softmax(dim=1)
accuracy = 0
count = 0
with torch.no_grad():
    
    matches = {}
    counts = {}
    
    for batch_idx, (labels, landmarks) in enumerate(valid_dataloader):  
        out = model(landmarks)
        prob = softmax(out.data)     #setup for threshold or 'garbage' class
        _, klass = torch.max(out.data, 1)

        #print(klass, labels, klass==labels)
        #print(prob)
        #print("-----")

        # x is class
        for i, x in enumerate(klass.tolist()):
            label = int(labels[i])
            if x == labels[i]:
                matches[x] = matches.get(x, 0) + 1
            else:
                # track label-side class as well
                matches[x] = matches.get(x, 0)
                matches[ label ] = matches.get(label, 0)
                                
            counts[label] = counts.get(label, 0) + 1
            counts[x] = counts.get(x, 0) + 1
            
        # aggregate accuracy
        accuracy += (klass == labels).sum().item()
        count += len(labels)
        

print('--------')
print("Accuracy {}/{} : {:.4f}".format(accuracy, count, accuracy/count))
print("------------")

# class vs percentage label match - track whether some gestures have bad data
acc = sorted([(int(k), v / counts.get(k, 1)) for k,v in matches.items()], key=lambda x:x[1])
for klass, match in acc:
    print(klass, " {:.4f}".format(match))


--------
Accuracy 2055/2880 : 0.7135
------------
2  0.1860
26  0.1954
46  0.2239
4  0.2250
24  0.2330
6  0.2340
8  0.2344
14  0.2353
27  0.2887
19  0.2903
47  0.2923
23  0.3023
51  0.3069
45  0.3088
29  0.3188
36  0.3191
0  0.3261
18  0.3273
13  0.3333
3  0.3333
5  0.3333
28  0.3385
40  0.3387
22  0.3396
38  0.3453
10  0.3455
15  0.3462
25  0.3488
54  0.3492
53  0.3509
34  0.3553
41  0.3575
44  0.3600
7  0.3636
49  0.3636
61  0.3636
55  0.3656
33  0.3678
1  0.3704
56  0.3704
21  0.3784
42  0.3816
39  0.3846
43  0.3853
57  0.3874
9  0.3882
62  0.3929
12  0.3956
52  0.3976
30  0.3983
32  0.4068
60  0.4204
16  0.4225
20  0.4231
35  0.4242
17  0.4321
37  0.4384
48  0.4444
58  0.4571
11  0.4659
31  0.4742
50  0.4900
59  0.5000


In [4]:
print("Done")

Done
