<a href="https://colab.research.google.com/github/vlamen/tue-deeplearning/blob/main/assignments/assignment1/workbook-task1.2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

TODO: Modify this cell to add your group name, group number and your names and student IDs

Group: 99

Authors:

In [2]:
import requests
import io
import pandas as pd
import xarray as xa

%pylab inline

Populating the interactive namespace from numpy and matplotlib


### Load training/testing dataset

In [3]:
def load_dataset_from_url(url):
    """
    Loads a dataset from surfdrive. 
    
    Input:
    url: Download link of dataset 
    
    Outputs:
    x: Input features in numpy array format
    y: Targets/labels in numpy array format
    """
    
    response = requests.get(url)
    response.raise_for_status()
    
    dataset = np.load(io.BytesIO(response.content)) 
    
    x, y = np.split(dataset, [9], axis=2)
    
    return x, y
    
    
# Downloading may take a while..
train_x, train_y = load_dataset_from_url('https://surfdrive.surf.nl/files/index.php/s/gVrTFgSJ1rWl1IN/download')
test_x, test_y = load_dataset_from_url('https://surfdrive.surf.nl/files/index.php/s/JR0WXbrzzTAmwEB/download')

train_y, test_y = train_y.squeeze(-1), test_y.squeeze(-1)
print(f"train_x shape: {train_x.shape}")
print(f"train_y shape: {train_y.shape}\n")

print(f"test_x shape: {test_x.shape}")
print(f"test_y shape: {test_y.shape}")

train_x shape: (10000, 1000, 9)
train_y shape: (10000, 1000)

test_x shape: (2000, 1000, 9)
test_y shape: (2000, 1000)


### Visualize some of the training samples

In [4]:
def to_df(x, y):
    """
    Converts training/testing input features and corresponding labels into
    a Pandas Dataframe format
    
    Inputs:
    x: Input features (train or test) in numpy array format
    y: Targets/labels (train or test) in numpy array format
    
    Output:
    dataset_df: Train or test data, structered as a table with column names
    """
    
    numpy_data = np.concatenate([x,y], axis=2)
    
    dataset_df = xa.DataArray(numpy_data, 
                                     dims = ['N', 'frame', 'sensor'],
                                     name='training_data')\
                                        .to_dataframe()\
                                        .unstack('sensor')['training_data']\
                                        .reset_index()

    column_names = ['tot_acc_x', 'tot_acc_y', 'tot_acc_z', 'body_acc_x', 'body_acc_y',
       'body_acc_z', 'body_gyro_x', 'body_gyro_y', 'body_gyro_z', 'activity']
    
    dataset_df = dataset_df.rename(columns = dict(zip(list(dataset_df.columns[2:]), 
                                                      column_names)))\
                                         .astype({'activity':int})

    return dataset_df



def plot_training_samples(N, dataset_df):
    """
    Plots samples in test/train dataset
    
    Inputs 
    N: Number of samples that will be visualised. 
    dataset_df: Train or test data, structered as a table with column names. 
                This tabular structured data can be obtained with `to_df` function.
    """
    
    f, axes = plt.subplots(N, 4, figsize=(30, N*7))
    axes = iter(axes)

    for pid, df_pid in list(dataset_df.groupby('N'))[:N]:

        ax_tot_acc, ax_body_acc, ax_body_gyro, ax_activity = tuple(next(axes))

        df_pid.plot(x = 'frame', y=['tot_acc_x','tot_acc_y', 'tot_acc_z'], title=f'sample={pid}', ax=ax_tot_acc)
        df_pid.plot(x = 'frame', y=['body_acc_x','body_acc_y', 'body_acc_z'], ax=ax_body_acc)
        df_pid.plot(x = 'frame', y=['body_gyro_x','body_gyro_y', 'body_gyro_z'], ax=ax_body_gyro)
        df_pid.plot(x = 'frame', y=['activity'], ax=ax_activity) 
        
    
    

training_data_df = to_df(train_x, train_y)

plot_training_samples(2, training_data_df);

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 3 dimension(s) and the array at index 1 has 2 dimension(s)

In [None]:
#TODO Implement the solution to task 2 of assignment 1

In [4]:
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from torch import nn
import numpy as np

In [5]:
class RNNClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, num_layers):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(in_channels=input_size, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True)
        )
        self.rnn = nn.LSTM(input_size=64, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(in_features=hidden_size, out_features=num_classes)

    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = self.conv(x).permute(0, 2, 1)
        x, (_, _) = self.rnn(x)
        x = self.fc(x)
        return x.permute(0, 2, 1)

In [7]:
from tqdm import tqdm
import matplotlib.pyplot as plt

class Trainer():
    def __init__(self,
                 model: torch.nn.Module,
                 device: torch.device,
                 criterion: torch.nn.Module,
                 optimizer: torch.optim.Optimizer,
                 training_dataLoader: torch.utils.data.Dataset,
                 validation_dataLoader: torch.utils.data.Dataset ,
                 epochs: int
                 ):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.training_dataLoader = training_dataLoader
        self.validation_dataLoader = validation_dataLoader
        self.device = device
        self.epochs = epochs

    def run_trainer(self):
        train_losses_total = []
        val_losses_total = []
        for epoch in tqdm(range(self.epochs)):
            self.model.train()  # train mode
            train_losses=[]
            for batch in self.training_dataLoader:
                x, y = batch
                input, target = x.to(device=self.device, dtype=torch.float), y.to(self.device) # send to device (GPU or CPU)
                self.optimizer.zero_grad()  # zerograd the parameters
                out = self.model(input)  # one forward pass
                loss = self.criterion(out, target)  # calculate training loss

                loss_value = loss.item()
                train_losses.append(loss_value)

                loss.backward()  # one backward pass
                self.optimizer.step()  # update the parameters

            self.model.eval()  # evaluation mode
            val_losses = []  # accumulate the losses here

            for batch in self.validation_dataLoader:
                x, y = batch
                input, target = x.to(device=self.device, dtype=torch.float), y.to(device=self.device)  # send to device (GPU or CPU)
                with torch.no_grad():
                    out = self.model(input)   # one forward pass
                    loss = self.criterion(out, target) # calculate validation loss

                    loss_value = loss.item()
                    val_losses.append(loss_value)

            print('Epoch:', epoch)
            print('Training loss,', np.mean(train_losses))
            print('Validation loss,', np.mean(val_losses))
            train_losses_total.append(np.mean(train_losses))
            val_losses_total.append(np.mean(val_losses))
        plt.plot(range(1, self.epochs + 1), train_losses_total, 'g', label='Training loss')
        plt.plot(range(1, self.epochs + 1), val_losses_total, 'b', label='Validation loss')
        plt.title('Training and Validation loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
        plt.savefig('loss.png')
        plt.show()

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
BATCH_SIZE = 32
NUM_CLASSES = 6
NUM_FEATURES = 9
HIDDEN_SIZE = 128
NUM_RNN_LAYERS = 1

In [10]:
train_x, val_x, train_y, val_y = train_test_split(train_x, train_y, test_size=0.2)
train_data = TensorDataset(torch.tensor(train_x), torch.tensor(train_y, dtype=torch.long))
val_data = TensorDataset(torch.tensor(val_x), torch.tensor(val_y, dtype=torch.long))
test_data = TensorDataset(torch.tensor(test_x), torch.tensor(test_y, dtype=torch.long))

train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE)
val_dataloader = DataLoader(val_data, batch_size=BATCH_SIZE)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE)

model = RNNClassifier(input_size=NUM_FEATURES, hidden_size=HIDDEN_SIZE, num_classes=NUM_CLASSES, num_layers=NUM_RNN_LAYERS).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

trainer = Trainer(model=model, device=device, criterion=criterion, optimizer=optimizer, training_dataLoader=train_dataloader, validation_dataLoader=val_dataloader, epochs=100)

In [None]:
trainer.run_trainer()

 19%|█▉        | 19/100 [04:29<19:25, 14.39s/it]

Epoch: 0
Training loss, 1.78517034471035
Validation loss, 1.7783592200279237
Epoch: 1
Training loss, 1.7748009389638901
Validation loss, 1.772412450313568
Epoch: 2
Training loss, 1.7702831268310546
Validation loss, 1.7694251823425293
Epoch: 3
Training loss, 1.7675303852558135
Validation loss, 1.7671383953094482
Epoch: 4
Training loss, 1.764882922768593
Validation loss, 1.7643097043037415
Epoch: 5
Training loss, 1.7612110966444015
Validation loss, 1.759897322654724
Epoch: 6
Training loss, 1.7551574289798737
Validation loss, 1.7522098350524902
Epoch: 7
Training loss, 1.7439997911453247
Validation loss, 1.7370522689819337
Epoch: 8
Training loss, 1.7197756427526474
Validation loss, 1.7011559534072875
Epoch: 9
Training loss, 1.6551569545269011
Validation loss, 1.597565631866455
Epoch: 10
Training loss, 1.490955427289009
Validation loss, 1.4025187230110168
Epoch: 11
Training loss, 1.3445367282629013
Validation loss, 1.3247045183181763
Epoch: 12
Training loss, 1.296502473950386
Validation los

In [10]:
def predict(test_dataloader, model, device):
    model.eval()
    outputs = []
    for batch in tqdm(test_dataloader):
        x, y = batch
        input, target = x.to(device=device, dtype=torch.float), y.to(device=device)  # send to device (GPU or CPU)
        with torch.no_grad():
            output = model(input)   # one forward pass
            outputs.append(torch.argmax(output, dim=1).cpu().detach().numpy())
    return np.vstack(outputs)

test_pred = predict(test_dataloader, model, device)

100%|██████████| 63/63 [00:01<00:00, 42.24it/s]


In [1]:
acc = np.sum(test_pred == test_y) / (test_y.shape[0] * test_y.shape[1])
print(acc)

NameError: name 'np' is not defined