In [None]:
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Reading in Data

We have elected to divide the data into windows of length 120, with one y-measurement at the center of each window. This will enable us to use a convolutional neural network with many-to-one mapping.

### Setup

In [None]:
import numpy  as np
import pandas as pd
import os

# Variables
# ----------------------------------------------------------
path_training = 'drive/MyDrive/ECE542_fa2021_Project_TerrainRecognition/TrainingData/'     # Training data path
path_test     = 'drive/MyDrive/ECE542_fa2021_Project_TerrainRecognition/TestData/'         # Test data path

metadata = False                    # Print metadata
samples  = 120                      # Number of samples in x data to take from y interval


# Fixed
# ----------------------------------------------------------
window = samples // 2               # Window dimension on either side based on samples size.
xdata  = np.empty((0, samples * 6)) # Initialize set for x training data.
ydata  = np.empty((0,))             # Initialize set for y training labels.
info   = {}                         # Subject and trial info in order to split the data back out if needed.


# Checks
# ----------------------------------------------------------
if metadata:
    print(os.path.exists(path_training))
    print(os.path.exists(path_test))
    print(f'Samples: {samples}')

### Training Data

In [None]:
print('Processing Training Data...')

for file in sorted(os.listdir(path_training)):
    if file.endswith('x.csv'):
        print(f'Processing: {file}')
        
        # Determine subject
        subject = len('subject_')
        subject = int(file[subject:subject + 3])
        
        if subject not in info:
            info[subject] = {}
        
        # Determine trial
        trial   = len('subject_') + 4
        trial   = int(file[trial:trial + 2])

        
        # Read data
        xin  = pd.read_csv(path_training + file, header=None).rolling(window=3, min_periods=1).mean()
        yin  = pd.read_csv(path_training + file.replace('x.csv', 'y.csv'), header=None)
        
        # Process data
        rows = yin.shape[0]
        
        xout = np.zeros((rows, xin.shape[1] * samples))
        yout = np.zeros((rows), dtype=int)
                
        
        # Iterate through all rows and stitch input data together according to window size.
        i     = 1
        count = 0
        
        for row in xout:
            for j in range(-window, window):
                k = (j + window) * xin.shape[1]

                try:
                    if i + j >= 0:
                        row[k:k + 6] = xin[i + j:i + j + 1]
                        
                    yout[count] = yin[i // 4:i // 4 + 1][0]
                except:
                    pass
            
            i     += 4
            count += 1
        
        xdata = np.concatenate((xdata, xout), axis=0)
        ydata = np.concatenate((ydata, yout), axis=0)
        
        info[subject][trial] = count
         
        print(f'Completed:  {count} rows\n')
        

# Training Data to Dataframes
# ----------------------------------------------------------
xtrain = pd.DataFrame(xdata)
ytrain = pd.DataFrame(ydata, columns=['Label'])


if metadata:
    print(f'xdata:    {xdata.shape}')
    print(f'xtrain:   {xtrain.shape}')
    print(f'ydata:    {ydata.shape}')
    print(f'ytrain:   {ytrain.shape}')
    print(f'info:     {info}')
    print(f'first 10x: \n{xtrain[:10]}')
    print(f'last  10x: \n{xtrain[-10:]}')
    print(f'first 10y: \n{ytrain[:10]}')
    print(f'last  10y: \n{ytrain[-10:]}')

### Test Data

In [None]:
print('\nProcessing Test Data...')

xtest = {}
ytest = {}

for file in sorted(os.listdir(path_test)):
    if file.endswith('x.csv'):
        print(f'Processing: {file}')
        
        # Determine subject
        subject = len('subject_')
        subject = int(file[subject:subject + 3])
        
        if subject not in info:
            info[subject] = {}
        
        # Determine trial
        trial   = len('subject_') + 4
        trial   = int(file[trial:trial + 2])

        
        # Read data
        xin  = pd.read_csv(path_test + file, header=None).rolling(window=3, min_periods=1).mean()
        yin  = pd.read_csv(path_test + file.replace('x.csv', 'y_time.csv'), header=None)
        
        # Process data
        rows = yin.shape[0]
        
        xout = np.zeros((rows, xin.shape[1] * samples))
        yout = np.zeros((rows), dtype=int)    
        
        # Iterate through all rows and stitch input data together according to window size.
        i     = 1
        count = 0
        
        for row in xout:
            for j in range(-window, window):
                k = (j + window) * xin.shape[1]

                try:
                    if i + j >= 0:
                        row[k:k + 6] = xin[i + j:i + j + 1]
                
                    yout[count] = -1
                except:
                    pass
            
            i     += 4
            count += 1
        
        xtest[subject] = pd.DataFrame(xout)
        ytest[subject] = pd.DataFrame(yout, columns=['Label'])

        info[subject][trial] = count
         
        print(f'Completed:  {count} rows\n')

print('xtest data')
for k in xtest.keys():
    print(f'subject: {k}\tshape: {xtest[k].shape}')

print('\nytest data')    
for k in ytest.keys():
    print(f'subject: {k}\tshape: {ytest[k].shape}')


if metadata:
    print(f'info:     {info}')
    print(f'first 10x: \n{xtest[9][:100]}')
    print(f'last  10x: \n{xtest[9][-10:]}')
    print(f'first 10y: \n{ytest[9][:10]}')
    print(f'last  10y: \n{ytest[9][-10:]}')

### Pickle

This allows temporary storage of the train and test dataframes to bypass the lengthy loading operation.

#### Storage

In [None]:
import pickle
pickle.dump(xtrain, open('drive/MyDrive/ECE542_fa2021_Project_TerrainRecognition/ProcessedData/xtrain_window3000ms.pkl', 'wb'))
pickle.dump(ytrain, open('drive/MyDrive/ECE542_fa2021_Project_TerrainRecognition/ProcessedData/ytrain_window3000ms.pkl', 'wb'))
pickle.dump(xtest, open('drive/MyDrive/ECE542_fa2021_Project_TerrainRecognition/ProcessedData/xtest_window3000ms.pkl', 'wb'))
pickle.dump(ytest, open('drive/MyDrive/ECE542_fa2021_Project_TerrainRecognition/ProcessedData/ytest_window3000ms.pkl', 'wb'))

#### Reading

In [None]:
import pickle
xtrain = pickle.load(open('drive/MyDrive/ECE542_fa2021_Project_TerrainRecognition/ProcessedData/xtrain_window3000ms.pkl', 'rb'))
ytrain = pickle.load(open('drive/MyDrive/ECE542_fa2021_Project_TerrainRecognition/ProcessedData/ytrain_window3000ms.pkl', 'rb'))
xtest = pickle.load(open('drive/MyDrive/ECE542_fa2021_Project_TerrainRecognition/ProcessedData/xtest_window3000ms.pkl', 'rb'))
ytest = pickle.load(open('drive/MyDrive/ECE542_fa2021_Project_TerrainRecognition/ProcessedData/ytest_window3000ms.pkl', 'rb'))

### Train-val split (*among* training data)

In [None]:
from sklearn.model_selection import train_test_split
df_train_pre_split = pd.merge(left=xtrain, right=ytrain, left_index=True, right_index=True)
df_train, df_test = train_test_split(df_train_pre_split, test_size=0.2, random_state=42, shuffle=False)
del df_train_pre_split

### Data to tensors

In [None]:
import torch
import torch.nn.functional as F
import numpy as np

# Set up gpu, if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# After setup:
#   Dimension 0: length of training set
#   Dimension 1: 6 channels (2 sensors with X,Y,Z)
#   Dimension 2: number of measurements in time window
xtrain_t = torch.tensor([
    np.array(df_train.iloc[:,0:-1:6]),
    np.array(df_train.iloc[:,1::6]),
    np.array(df_train.iloc[:,2::6]),
    np.array(df_train.iloc[:,3::6]),
    np.array(df_train.iloc[:,4::6]),
    np.array(df_train.iloc[:,5::6])
]).permute(1,0,2).float().to(device)

# Just the labels
ytrain_t = torch.tensor(np.array(df_train['Label'])).long().squeeze().to(device)

# After setup:
#   Dimension 0: length of training set
#   Dimension 1: 6 channels (2 sensors with X,Y,Z)
#   Dimension 2: number of measurements in time window
xtest_t = torch.tensor([
    np.array(df_test.iloc[:,0:-1:6]),
    np.array(df_test.iloc[:,1::6]),
    np.array(df_test.iloc[:,2::6]),
    np.array(df_test.iloc[:,3::6]),
    np.array(df_test.iloc[:,4::6]),
    np.array(df_test.iloc[:,5::6])
]).permute(1,0,2).float().to(device)

# Just the labels
ytest_t = torch.tensor(np.array(df_test['Label'])).long().squeeze().to(device)



# Model (CNN)

### Network Architecture

We parameterize several architectural components of the network, in order to support both parameter *and* hyperparameter optimization via cross-validation.

In [None]:
import torch.nn as nn
class Net(nn.Module):
    def __init__(self, kernel_size_1, n_filters_1, kernel_size_2, n_filters_2):
        super().__init__()

        # 1st Convolutional Layer
        self.conv1 = nn.Sequential(
            nn.Conv1d(in_channels=6, out_channels=n_filters_1, kernel_size=kernel_size_1, stride=1),
            nn.ReLU(),
            nn.AvgPool1d(kernel_size=3, stride=1)
        )

        # 2nd Convolutional Layer
        self.conv2 = nn.Sequential(
            nn.Conv1d(in_channels=n_filters_1, out_channels=n_filters_2, kernel_size=kernel_size_2, stride=1),
            nn.ReLU(),
            nn.AvgPool1d(kernel_size=3, stride=3)
        )

        # Fully connected linear layers
        self.fc = nn.Sequential(
            nn.LazyLinear(128),
            nn.Linear(128, 4)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        # Flatten the kernel outputs
        x = torch.flatten(x, start_dim=1)
        x = self.fc(x)
        return x

### Evaluation and Tuning

#### Cross-Validation

In [None]:
from sklearn.metrics import f1_score, accuracy_score
from torch.utils.data import DataLoader, WeightedRandomSampler, TensorDataset
import torch.optim as optim


import warnings
warnings.filterwarnings('ignore')


folds = 5
fold_size = (len(xtrain_t) // folds)

validation_df = pd.DataFrame(columns=['i', 'fold', 'epoch', 'batch_size', 'kernel_size_1', 'n_filters_1', 'kernel_size_2', 'n_filters_2', 'loss', 'micro_f1', 'macro_f1', 'accuracy'])

# # Hyperparameters
n_epochs = 16
batch_sizes = [32, 64]
kernel_size_1s = [5, 7, 9]
n_filters_1s = [6, 12]
kernel_size_2s = [5, 7, 9]
n_filters_2s = [6, 12]


i = 1
for batch_size in batch_sizes:
    for kernel_size_1 in kernel_size_1s:
        for n_filters_1 in n_filters_1s:
            for kernel_size_2 in kernel_size_2s:
                for n_filters_2 in n_filters_2s:
                    combinations = len(batch_sizes) * len(kernel_size_1s) * len(n_filters_1s) * len(kernel_size_2s) * len(n_filters_2s)
                    print("Starting hyperparameter set {i}/{combinations}".format(i=i, combinations=combinations))

                    fold = 1
                    for val_start_index in range(0,len(ytrain_t),fold_size):
                        if val_start_index == 0:
                            train_fold_x = xtrain_t[fold_size:]
                            train_fold_y = ytrain_t[fold_size:]
                        elif val_start_index == len(ytrain_t) - fold_size:
                            train_fold_x = xtrain_t[:val_start_index]
                            train_fold_y = ytrain_t[:val_start_index]
                        else:
                            train_fold_x = torch.cat((xtrain_t[:val_start_index], xtrain_t[val_start_index + fold_size:]))
                            train_fold_y = torch.cat((ytrain_t[:val_start_index], ytrain_t[val_start_index + fold_size:]))
                        val_fold_x = xtrain_t[val_start_index:val_start_index + fold_size]
                        val_fold_y = ytrain_t[val_start_index:val_start_index + fold_size]
                        
                        
                        train_data = TensorDataset(train_fold_x, train_fold_y)

                        class_weights = [len(df_train) / value for value in df_train['Label'].value_counts()]
                        sample_weights = np.array([class_weights[i] for i in val_fold_y])
                        sampler = WeightedRandomSampler(sample_weights, val_fold_y.shape[0], replacement=True)
                        train_loader = DataLoader(train_data, batch_size=batch_size, sampler=sampler)

                        # Instantiate neural net
                        net = Net(kernel_size_1, n_filters_1, kernel_size_2, n_filters_2).to(device);

                        # Loss function and optimizer
                        criterion = nn.CrossEntropyLoss()
                        optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, nesterov=True)

                        # Arbitrarily high loss
                        last_epoch_loss = float('inf')

                        for epoch in range(1, n_epochs + 1):
                            # print('Epoch {epoch}'.format(epoch=epoch))

                            # Counter for average loss across epoch
                            sum_loss = 0.0
                            
                            batch_count = 0
                            for _, (batch_x, batch_y) in enumerate(train_loader):
                                batch_count += 1

                                # Reset the gradients
                                optimizer.zero_grad()

                                # Forward pass
                                outputs = net.forward(batch_x)

                                # Calculate loss
                                loss = criterion(outputs, batch_y)
                                sum_loss += loss.item()
                                loss.backward()
                                optimizer.step()
                                
                                # Print status at given interval
                                # if batch_count % 1000 == 999:
                                #     print('\tbatch {batch_count}, loss: {loss}'.format(batch_count=batch_count+1, loss=sum_loss / batch_count))

                            this_epoch_loss = sum_loss / batch_count
                            # print('Average Loss: {loss}'.format(loss=this_epoch_loss))

                            with torch.no_grad():
                                outputs = net.forward(val_fold_x)
                                loss = criterion(outputs, val_fold_y)
                                preds = torch.argmax(outputs, 1)
                                micro_f1 = f1_score(y_true=val_fold_y.cpu(), y_pred=preds.cpu(), average='micro')
                                macro_f1 = f1_score(y_true=val_fold_y.cpu(), y_pred=preds.cpu(), average='macro')
                                accuracy = accuracy_score(y_true=val_fold_y.cpu(), y_pred=preds.cpu())
                                row = {
                                    'i': i, 
                                    'fold': fold, 
                                    'epoch': epoch, 
                                    'batch_size': batch_size, 
                                    'kernel_size_1': kernel_size_1, 
                                    'n_filters_1': n_filters_1, 
                                    'kernel_size_2': kernel_size_2, 
                                    'n_filters_2': n_filters_2, 
                                    'loss': sum_loss / batch_count,
                                    'accuracy': accuracy,
                                    'micro_f1': micro_f1,
                                    'macro_f1': macro_f1
                                }
                                validation_df = validation_df.append(row, ignore_index=True)
                        validation_df.to_csv('drive/MyDrive/ECE542_fa2021_Project_TerrainRecognition/validation_table.csv')
                        print("\tFinished fold ", fold)
                        fold += 1
                    i += 1



#### Cross-Validation Results
The best performance for both accuracy and macro_f1 was achieved by combination # 71

In [None]:
validation_df = pd.read_csv('drive/MyDrive/ECE542_fa2021_Project_TerrainRecognition/validation_table.csv')

In [None]:
# Accuracy and Macro F1 for each model
import numpy as np
import plotly.express as px

agg_group = pd.DataFrame(validation_df[validation_df['epoch'] > 4].groupby('i')['macro_f1'].agg(np.mean))
agg_group['i'] = agg_group.index
agg_group['accuracy'] = pd.DataFrame(validation_df[validation_df['epoch'] > 4].groupby('i')['accuracy'].agg(np.mean))
fig = px.bar(agg_group, x="i", y='accuracy', height=400,range_y=(.7,1))
fig.show()
fig = px.bar(agg_group, x="i", y='macro_f1', height=400,range_y=(.6,1))
fig.show()

Below are the hyperparameters and architecture of our most successful model. This model (retrained) will be the source of our predictions.

In [None]:
# Winning model
validation_df[validation_df['i'] == 71].head(1)

Unnamed: 0.1,Unnamed: 0,i,fold,epoch,batch_size,kernel_size_1,n_filters_1,kernel_size_2,n_filters_2,loss,micro_f1,macro_f1,accuracy
5600,5600,71.0,1.0,1.0,64.0,9.0,12.0,9.0,6.0,0.325154,0.890899,0.859396,0.890899


In [None]:
# Accuracy and macro f1 for all epoch counts
agg_epoch = pd.DataFrame(validation_df.groupby('epoch')['macro_f1'].agg(np.mean))
agg_epoch['epoch'] = agg_epoch.index
agg_epoch['accuracy'] = pd.DataFrame(validation_df.groupby('epoch')['accuracy'].agg(np.mean))
fig = px.bar(agg_epoch, x="epoch", y='accuracy', height=400, range_y=(.75,1))
fig.show()
fig = px.bar(agg_epoch, x="epoch", y='macro_f1', height=400, range_y=(.6,1))
fig.show()

In [None]:
# Learning curve for each fold
px.line(validation_df[validation_df['i'] == 71], x='epoch', y='loss', line_group='fold', color='fold')

In [None]:
group_df = validation_df[validation_df['i'] == 71].groupby('epoch').agg(np.mean)
group_df

Unnamed: 0_level_0,Unnamed: 0,i,fold,batch_size,kernel_size_1,n_filters_1,kernel_size_2,n_filters_2,loss,micro_f1,macro_f1,accuracy
epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1.0,5632,71.0,3.0,64.0,9.0,12.0,9.0,6.0,0.337513,0.845332,0.704727,0.845332
2.0,5633,71.0,3.0,64.0,9.0,12.0,9.0,6.0,0.120655,0.852111,0.71508,0.852111
3.0,5634,71.0,3.0,64.0,9.0,12.0,9.0,6.0,0.087918,0.851306,0.688473,0.851306
4.0,5635,71.0,3.0,64.0,9.0,12.0,9.0,6.0,0.07331,0.85829,0.710335,0.85829
5.0,5636,71.0,3.0,64.0,9.0,12.0,9.0,6.0,0.067581,0.869906,0.736993,0.869906
6.0,5637,71.0,3.0,64.0,9.0,12.0,9.0,6.0,0.060727,0.862401,0.723303,0.862401
7.0,5638,71.0,3.0,64.0,9.0,12.0,9.0,6.0,0.057327,0.855439,0.716551,0.855439
8.0,5639,71.0,3.0,64.0,9.0,12.0,9.0,6.0,0.053388,0.830116,0.713233,0.830116
9.0,5640,71.0,3.0,64.0,9.0,12.0,9.0,6.0,0.050514,0.857567,0.724606,0.857567
10.0,5641,71.0,3.0,64.0,9.0,12.0,9.0,6.0,0.04756,0.867331,0.753714,0.867331


In [None]:
# Macro F1 by epoch for winning model
px.line(group_df, x=group_df.index, y='accuracy', color='fold')

In [None]:
# Macro F1 by epoch for winning model
px.line(group_df, x=group_df.index, y='macro_f1', color='fold')

## Training and evaluation with held out test data

#### Training

In [None]:
import torch.optim as optim
from torch.utils.data import DataLoader, WeightedRandomSampler, TensorDataset

# Hyperparameters
batch_size = 64
n_epochs = 50

# Stratified sampling for 
class_weights = [len(df_train) / value for value in df_train['Label'].value_counts()]
sample_weights = np.array([class_weights[i] for i in ytrain_t])
sampler = WeightedRandomSampler(sample_weights, ytrain_t.shape[0], replacement=True)
train_data = TensorDataset(xtrain_t, ytrain_t)
train_loader = DataLoader(train_data, batch_size=batch_size, sampler=sampler)

# Instantiate neural net
net = Net(kernel_size_1=9, n_filters_1=12, kernel_size_2=9, n_filters_2=6).to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=3e-4)

# Arbitrarily high loss
last_epoch_loss = float('inf')

for epoch in range(1, n_epochs + 1):
    print('Epoch {epoch}'.format(epoch=epoch))

    # Counter for average loss across epoch
    sum_loss = 0.0
    
    batch_count = 0
    for _, (batch_x, batch_y) in enumerate(train_loader):
        batch_count += 1

        # Reset the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net.forward(batch_x)

        # Calculate loss
        loss = criterion(outputs, batch_y)
        sum_loss += loss.item()
        loss.backward()
        optimizer.step()
        
        # # Print status at given interval
        # if batch_count % 1000 == 999:
        #     print('\tbatch {batch_count}, loss: {loss}'.format(batch_count=batch_count+1, loss=sum_loss / batch_count))
    
    this_epoch_loss = sum_loss / batch_count
    print('Average Loss: {loss}'.format(loss=this_epoch_loss))
    from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, classification_report
    with torch.no_grad(): 
        outputs = net.forward(xtest_t)
        loss = criterion(outputs, ytest_t)
        preds = torch.argmax(outputs, 1)
        print("Macro F1", f1_score(y_true=ytest_t.cpu(), y_pred=preds.cpu(), average='macro'))
        # ConfusionMatrixDisplay(confusion_matrix(y_true=ytest_t.cpu(), y_pred=preds.cpu()), display_labels=[0,1,2,3]).plot()

    # # Finish training if loss hasn't decreased more than tol in this epoch
    # if last_epoch_loss - this_epoch_loss  < tol:
    #     print('Tol reached\nAverage Loss: {loss}'.format(loss=this_epoch_loss))
    #     break
    # last_epoch_loss = this_epoch_loss

#### Test Results

Making predictions

In [None]:
with torch.no_grad(): 
    outputs = net.forward(xtest_t)
    loss = criterion(outputs, ytest_t)
    preds = torch.argmax(outputs, 1)
    print("Macro F1", f1_score(y_true=ytest_t.cpu(), y_pred=preds.cpu(), average='macro'))


Macro F1 0.8182742570117818


Here we explore smoothing the output. We use a rolling window and calculate the mode. We use mode averaging, because our outputs are neither continuous nor ordinal, so mean averaging would not make sense.



In [None]:
from sklearn.metrics import f1_score
for window_size in range(1, 25):
    preds = pd.Series(torch.argmax(outputs, 1).cpu())
    preds = preds.rolling(window=window_size, min_periods=1).apply(lambda x: x.mode()[0])
    print(window_size, f1_score(y_true=ytest_t.cpu(), y_pred=preds, average='macro'))
# ConfusionMatrixDisplay(confusion_matrix(y_true=ytest_t.cpu(), y_pred=preds), display_labels=[0,1,2,3]).plot()

1 0.8182742570117818
2 0.8287054188067237
3 0.8227638157417743
4 0.8295761288728435
5 0.8258534131692048
6 0.8306365710476943
7 0.8276572646607463
8 0.8309264045949778
9 0.8281162922739986
10 0.8304290307095417
11 0.8270959306001151
12 0.8289127213778291
13 0.8260991686148093
14 0.8269753184985288
15 0.823667226306418
16 0.8238351153687316
17 0.8208081691316398
18 0.8205764173153891
19 0.8171312678938003
20 0.8164925232311713
21 0.8133246693431329
22 0.8120546117873704
23 0.809858507990591
24 0.8090448006098852


## Training on full training dataset

#### Full training dataset to tensors

In [None]:
df_train = pd.merge(left=xtrain, right=ytrain, left_index=True, right_index=True)

import torch
import torch.nn.functional as F
import numpy as np
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

xtrain_t = torch.tensor([
    np.array(df_train.iloc[:,0:-1:6]),
    np.array(df_train.iloc[:,1::6]),
    np.array(df_train.iloc[:,2::6]),
    np.array(df_train.iloc[:,3::6]),
    np.array(df_train.iloc[:,4::6]),
    np.array(df_train.iloc[:,5::6])
]).permute(1,0,2).float().to(device)

ytrain_t = torch.tensor(np.array(df_train['Label'])).long().squeeze().to(device)

#### Retrain with full training dataset

In [None]:
import torch.optim as optim
from torch.utils.data import DataLoader, WeightedRandomSampler, TensorDataset

# Hyperparameters
batch_size = 32
n_epochs = 5
tol = .01

# Stratified sampling for 
class_weights = [len(df_train) / value for value in df_train['Label'].value_counts()]
sample_weights = np.array([class_weights[i] for i in ytrain_t])
sampler = WeightedRandomSampler(sample_weights, ytrain_t.shape[0], replacement=True)
train_data = TensorDataset(xtrain_t, ytrain_t)
train_loader = DataLoader(train_data, batch_size=batch_size, sampler=sampler)

# Instantiate neural net
net = Net(kernel_size_1=9, n_filters_1=12, kernel_size_2=9, n_filters_2=6).to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, nesterov=True)

# Arbitrarily high loss
last_epoch_loss = float('inf')

for epoch in range(1, n_epochs + 1):
    print('Epoch {epoch}'.format(epoch=epoch))

    # Counter for average loss across epoch
    sum_loss = 0.0
    
    batch_count = 0
    for _, (batch_x, batch_y) in enumerate(train_loader):
        batch_count += 1

        # Reset the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net.forward(batch_x)

        # Calculate loss
        loss = criterion(outputs, batch_y)
        sum_loss += loss.item()
        loss.backward()
        optimizer.step()
        
        # Print status at given interval
        if batch_count % 1000 == 999:
            print('\tbatch {batch_count}, loss: {loss}'.format(batch_count=batch_count+1, loss=sum_loss / batch_count))
    
    this_epoch_loss = sum_loss / batch_count
    print('Average Loss: {loss}'.format(loss=this_epoch_loss))

    # Finish training if loss hasn't decreased more than tol in this epoch
    if last_epoch_loss - this_epoch_loss  < tol:
        print('Tol reached\nAverage Loss: {loss}'.format(loss=this_epoch_loss))
        break
    last_epoch_loss = this_epoch_loss

#### Generate and save predictions

In [None]:
for subject, df_test in xtest.items():
    xtest_t = torch.tensor([
        np.array(df_test.iloc[:,0::6]),
        np.array(df_test.iloc[:,1::6]),
        np.array(df_test.iloc[:,2::6]),
        np.array(df_test.iloc[:,3::6]),
        np.array(df_test.iloc[:,4::6]),
        np.array(df_test.iloc[:,5::6])
    ]).permute(1,0,2).float().to(device)
    with torch.no_grad():
        outputs = net.forward(xtest_t)
        preds = pd.Series(torch.argmax(outputs, 1).cpu())
        smooth_df = preds.rolling(8, center=True, min_periods=1).apply(lambda x: x.mode()[0]).astype(int)
        assert(smooth_df.shape == preds.shape)
        print('Passed test. Overwriting')
        # smooth_dfs[subject] = smooth_df
        smooth_df.to_csv('drive/MyDrive/ECE542_fa2021_Project_TerrainRecognition/Predictions/subject_0{:02d}_01__y.csv'.format(subject), header=False, index=False)
        preds.to_csv('drive/MyDrive/ECE542_fa2021_Project_TerrainRecognition/Predictions/subject_0{:02d}_01__y.csv'.format(subject), header=False, index=False)