<a href="https://colab.research.google.com/github/vlamen/tue-deeplearning/blob/main/assignments/assignment1/workbook-task1.2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

TODO: Modify this cell to add your group name, group number and your names and student IDs

Group: 99

Authors: Yang Yang, Ying Lu

In [9]:
import requests
import io
import pandas as pd
import xarray as xa

%pylab inline

Populating the interactive namespace from numpy and matplotlib


### Load training/testing dataset

In [None]:
def load_dataset_from_url(url):
    """
    Loads a dataset from surfdrive. 
    
    Input:
    url: Download link of dataset 
    
    Outputs:
    x: Input features in numpy array format
    y: Targets/labels in numpy array format
    """
    
    response = requests.get(url)
    response.raise_for_status()
    
    dataset = np.load(io.BytesIO(response.content)) 
    
    x, y = np.split(dataset, [9], axis=2)
    
    return x, y
    
    
# Downloading may take a while..
train_x, train_y = load_dataset_from_url('https://surfdrive.surf.nl/files/index.php/s/gVrTFgSJ1rWl1IN/download')
test_x, test_y = load_dataset_from_url('https://surfdrive.surf.nl/files/index.php/s/JR0WXbrzzTAmwEB/download')


print(f"train_x shape: {train_x.shape}")
print(f"train_y shape: {train_y.shape}\n")

print(f"test_x shape: {test_x.shape}")
print(f"test_y shape: {test_y.shape}")

### Visualize some of the training samples

In [None]:
def to_df(x, y):
    """
    Converts training/testing input features and corresponding labels into
    a Pandas Dataframe format
    
    Inputs:
    x: Input features (train or test) in numpy array format
    y: Targets/labels (train or test) in numpy array format
    
    Output:
    dataset_df: Train or test data, structered as a table with column names
    """
    
    numpy_data = np.concatenate([x,y], axis=2)
    
    dataset_df = xa.DataArray(numpy_data, 
                                     dims = ['N', 'frame', 'sensor'],
                                     name='training_data')\
                                        .to_dataframe()\
                                        .unstack('sensor')['training_data']\
                                        .reset_index()

    column_names = ['tot_acc_x', 'tot_acc_y', 'tot_acc_z', 'body_acc_x', 'body_acc_y',
       'body_acc_z', 'body_gyro_x', 'body_gyro_y', 'body_gyro_z', 'activity']
    
    dataset_df = dataset_df.rename(columns = dict(zip(list(dataset_df.columns[2:]), 
                                                      column_names)))\
                                         .astype({'activity':int})

    return dataset_df



def plot_training_samples(N, dataset_df):
    """
    Plots samples in test/train dataset
    
    Inputs 
    N: Number of samples that will be visualised. 
    dataset_df: Train or test data, structered as a table with column names. 
                This tabular structured data can be obtained with `to_df` function.
    """
    
    f, axes = plt.subplots(N, 4, figsize=(30, N*7))
    axes = iter(axes)

    for pid, df_pid in list(dataset_df.groupby('N'))[:N]:

        ax_tot_acc, ax_body_acc, ax_body_gyro, ax_activity = tuple(next(axes))

        df_pid.plot(x = 'frame', y=['tot_acc_x','tot_acc_y', 'tot_acc_z'], title=f'sample={pid}', ax=ax_tot_acc)
        df_pid.plot(x = 'frame', y=['body_acc_x','body_acc_y', 'body_acc_z'], ax=ax_body_acc)
        df_pid.plot(x = 'frame', y=['body_gyro_x','body_gyro_y', 'body_gyro_z'], ax=ax_body_gyro)
        df_pid.plot(x = 'frame', y=['activity'], ax=ax_activity) 
        
    
    

training_data_df = to_df(train_x, train_y)

plot_training_samples(2, training_data_df);

In [6]:
#TODO Implement the solution to task 2 of assignment 1

import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F



In [None]:
from tqdm import tqdm

class Trainer():
    def __init__(self,
                 model: torch.nn.Module,
                 device: torch.device,
                 criterion: torch.nn.Module,
                 optimizer: torch.optim.Optimizer,
                 training_dataLoader: torch.utils.data.Dataset,
                 validation_dataLoader: torch.utils.data.Dataset ,
                 epochs: int
                 ):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.training_dataLoader = training_dataLoader
        self.validation_dataLoader = validation_dataLoader
        self.device = device
        self.epochs = epochs

    def run_trainer(self):
        for epoch in tqdm(range(self.epochs)):
            self.model.train()  # train mode
            train_losses=[]
            for batch in self.training_dataLoader:
                x, y = batch
                input, target = x.to(device=self.device, dtype=torch.float), y.to(self.device)  # send to device (GPU or CPU)
                self.optimizer.zero_grad()  # zerograd the parameters
                out = self.model(input)  # one forward pass
                loss = self.criterion(out, target)  # calculate loss

                loss_value = loss.item()
                train_losses.append(loss_value)

                loss.backward()  # one backward pass
                self.optimizer.step()  # update the parameters

            self.model.eval()  # evaluation mode
            val_losses = []  # accumulate the losses here

            for batch in self.validation_dataLoader:
                x, y = batch
                input, target = x.to(device=self.device, dtype=torch.float), y.to(device=self.device, dtype=torch.float)  # send to device (GPU or CPU)
                with torch.no_grad():
                    out = self.model(input)   # one forward pass
                    loss = self.criterion(out, target) # calculate loss

                    loss_value = loss.item()
                    val_losses.append(loss_value)

            print('Epoch:', epoch)
            print('Training loss,', np.mean(train_losses))
            print('Validation loss,', np.mean(val_losses))

In [None]:
# CNN 

class CNNClassifier(nn.Module):
    def __init__(self):
        super(Embedding, self).__init__()
        self.layers = nn.Sequential(
            nn.Conv1d(in_channels=9, out_channels=192, kernel_size=10, padding=1),
            nn.BatchNorm1d(192),
            nn.ReLU(inplace=True),
            nn.MaxPool1d(kernel_size=4, stride=2),
            nn.Conv1d(in_channels=192, out_channels=96, kernel_size=10, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool1d(kernel_size=4, stride=2),
            nn.Flatten(),
            nn.Linear(96 * 250, 512)
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(256, 6)
            #nn.Softmax()
        )
    def forward(self, x):
        return self.layers(x)

# TODO: 1Dconv + LSTM

# TODO: LSTM

In [None]:
# Define customed dataset
from torch.utils.data import Dataset
class CustomAccelerometerDataset(Dataset):
    def __init__(self, inputs, targets):
        #'Initialization'
        self.inputs = inputs
        self.targets = targets
    
    def __len__(self):
        #'Denotes the total number of samples'
        return len(self.inputs)
    
    def __getitem__(self, index):
        #'Generates one sample of data'
        # Select sample
        return self.inputs[index], self.targets[index]

In [None]:
# split data into training set and validation set
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from tf.keras.utils import to_categorical(y, num_classes=None, dtype="float32")

def split(inputs, targets):

    inputs = np.array(inputs)
    targets = np.array(targets)
    
    targets = to_categorical(targets)
    
    # random seed
    random_seed = 42

    # split dataset into training set and validation set
    train_size = 0.8  # 80:20 split
    
    # @parameter
    # batch size for data loader
    loader_batch_size = 2

    inputs_train, inputs_valid = train_test_split(
        inputs,
        random_state=random_seed,
        train_size=train_size,
        shuffle=True)

    targets_train, targets_valid = train_test_split(
        targets,
        random_state=random_seed,
        train_size=train_size,
        shuffle=True)
    
    # dataset training
    dataset_train = CustomAccelerometerDataset(inputs=inputs_train,
                                              target=targets_train)

    # dataset validation
    dataset_valid = CustomAccelerometerDataset(inputs=inputs_valid,
                                        targets=targets_valid)

    # dataloader training
    dataloader_training = DataLoader(dataset=dataset_train,
                                     batch_size=loader_batch_size,
                                     shuffle=True)

    # dataloader validation
    dataloader_validation = DataLoader(dataset=dataset_valid,
                                       batch_size=loader_batch_size,
                                       shuffle=True)  

    return dataloader_training, dataloader_validation

In [None]:
def train(X, y):
    #@parameters:
    epoch_num = 20
    learning_rate = 0.001
    momentum_val = 0.9
    
    # split training data and validatation data
    dataloader_training, dataloader_validation = split(X, y)
    
    # device
    if torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device=torch.device('cpu')

    # model
    cnn = CNNClassifier()
    model = cnn.to(device)

    # criterion
    criterion = torch.nn.CrossEntropyLoss()

    # optimizer
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum_val)

    # trainer
    trainer = Trainer(model=model,
                      device=device,
                      criterion=criterion,
                      optimizer=optimizer,
                      training_dataLoader=dataloader_training,
                      validation_dataLoader=dataloader_validation,
                      epochs=epoch_num)

    # start training
    trainer.run_trainer()
    return model

In [None]:
def predict(x, model):
    
    # TODO: to be modified according to network structure
    model.eval()
    with torch.no_grad():
        out = model(x.to(device))  # send through model/network
    out_softmax = torch.softmax(out, dim=1)  # perform softmax on outputs
    
    status = torch.argmax(out_softmax, dim=1)  # perform argmax to generate 1 channel
    
    print('predict', status)
    return status
    
    

In [None]:

interval_size = 128
step = 3

#x = (n_sample, timestamp, channel), y = (n_sample, timestamp, 1)
new_train_x = []
new_train_y = []

for sample, label in train_x, train_y:
    for i in range(len(sample) - interval_size + 1, step):
        interval_x = sample[i : i + interval_size]
        interval_y = np.bincount(label[i : i + interval_size]).argmax()     
        new_train_x.append(interval_x)
        new_train_y.append(interval_y)
    
#train        
model = train(new_train_x, new_train_y)

# prediction on test data
predicted = []
for sample, label in test_x, test_y:
    y_label = [[]] * len(sample)
    for i in range(len(sample) - interval_size + 1, step):
        interval_x = sample[i : i + interval_size]
        interval_y = predict(interval_x, model)
        
        for j in range(i, i + interval_size):
            y_label[j].append(interval_y)
    
    predict_y = [ [np.bincount(y_j).argmax()]  for y_j in y_label]
    predicted.append(predict_y)
    #compare test_y & predict_y

    
    
# get multiclassification statistics    
    
from sklearn.metrics import precision_recall_fscore_support as score

y_test_array = [y_i for i in y for y in test_y]
predicted_array = [y_i for i in y for y in predicted]

precision, recall, fscore, support = score(y_test_array, predicted_array)

print('precision: {}'.format(precision))
print('recall: {}'.format(recall))
print('fscore: {}'.format(fscore))
print('support: {}'.format(support))


        
