In [None]:
import sys
print(f'Using python version {sys.version}') # get python version

In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd '/content/drive/MyDrive/247_project_data'

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import random

import torch
import torch.nn as nn
import torch.nn.functional as F

print(f'Using pytorch version {torch.__version__}')

In [None]:
print(torch.cuda.is_available())
device_id = 0 if torch.cuda.is_available() else 'cpu' # Equivalent to device_id = 'cuda:0'
device = torch.device(device_id) # use these semantics to specify a specific device. 

## Prepare the data

In [None]:
X_test = np.load("X_test.npy")
y_test = np.load("y_test.npy")
person_train_valid = np.load("person_train_valid.npy")
X_train_valid = np.load("X_train_valid.npy")
y_train_valid = np.load("y_train_valid.npy")
person_test = np.load("person_test.npy")

print ('Training/Valid data shape: {}'.format(X_train_valid.shape))
print ('Test data shape: {}'.format(X_test.shape))
print ('Training/Valid target shape: {}'.format(y_train_valid.shape))
print ('Test target shape: {}'.format(y_test.shape))
print ('Person train/valid shape: {}'.format(person_train_valid.shape))
print ('Person test shape: {}'.format(person_test.shape))

In [None]:
X_train_valid[0]

In [None]:
# Convert to 0-4 labeling and integer type (integers needed for categorical labels)
y_train_valid = (y_train_valid - np.min(y_train_valid)).astype('int')
y_test = (y_test - np.min(y_test)).astype('int')

## Build the CNN

In [None]:
class ShallowConvNet(nn.Module):
    def __init__(self, input_shape=(22, 1000), n_temporal_filters=40, n_spatial_filters=40, n_classes=4, use_BN = False):
        super().__init__()
        
        self.input_shape = input_shape
        self.n_temporal_filters = n_temporal_filters
        self.n_spatial_filters = n_spatial_filters
        self.n_classes = n_classes
        self.use_BN = use_BN

        self.temporal_convolution = nn.Conv2d(1, n_temporal_filters, (1, 25))
        self.spatial_convolution = nn.Conv2d(n_temporal_filters, n_spatial_filters, (input_shape[0], 1))
        self.average_pool = nn.AvgPool2d((1, 75), stride=(1, 15))
        self.BN_temporal = nn.BatchNorm2d(num_features = n_temporal_filters)
        self.BN_spatial = nn.BatchNorm2d(num_features = n_spatial_filters)

        #self.n_dense_features = n_spatial_filters*(1 + ((input_shape[1] - 25 + 1) - 75) // 15)
        self.dense = nn.LazyLinear(self.n_classes)
        self.elu = nn.ELU()
        self.dropout = nn.Dropout()
        return
    
    def forward(self, x):
        # x has shape (batch_size, input_shape[0], input_shape[1])
        # Let H0 = input_shape[0], H1 = input_shape[1]
        h = x

        # note that h.view(-1, 1, h.shape[1], h.shape[2]) works normally but does not work with torchinfo
        # this is because the torchinfo input has a weird shape
        h = h.view(-1, 1, self.input_shape[0], self.input_shape[1]) # view as (batch_size, 1, input_shape[0], input_shape[1])
        # Sometimes, view doesn't work and you have to use reshape. This is because of how tensors are stored in memory.

        # 2d convolution takes inputs of shape (batch_size, num_channels, H, W)
        h = self.temporal_convolution(h) # (batch_size, 1, H0, W0) -> (batch_size, n_temporal_filters, H0, W0 - 25 + 1)
        if self.use_BN == True:
          h = self.BN_temporal(h)
        h = self.elu(h)

        h = self.spatial_convolution(h) # (batch_size, n_temporal_filters, H0, W0 - 25 + 1) -> (batch_size, n_spatial_filters, 1, W0 - 25 + 1)
        if self.use_BN == True:
          h = self.BN_spatial(h)
        h = self.elu(h)

        h = torch.pow(h, 2.0) # square

        h = self.average_pool(h) # (batch_size, n_spatial_filters, 1, W0 - 25 + 1) -> (batch_size, n_spatial_filters, 1, 1 + ((W0 - 25 + 1) - 75)//15)

        h = torch.log(h) # (natural) log

        h = h.view(h.shape[0], -1) # flatten the non-batch dimensions
        h = self.dense(h) # (batch_size, self.n_dense_features) -> (batch_size, n_classes)
        #h = self.dropout(h)

        return h


In [None]:
# torchsummary is deprecated. Use torchinfo instead (https://github.com/TylerYep/torchinfo).
!pip install torchinfo

In [None]:
from torchinfo import summary
# Only uses outputs of modules.
print(summary(ShallowConvNet(use_BN = True), input_size=(22, 1000)))
#print(summary(DeepConvNet(), input_size=(22, 1000)))

## Build Dataset

In [None]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, X, Y):
        if isinstance(X, np.ndarray):
            self.X = torch.FloatTensor(X) # 32-bit float
        else:
            self.X = X
        if isinstance(Y, np.ndarray):
            self.Y = torch.LongTensor(Y) # integer type
        else:
            self.Y = Y
        return
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, index):
        return self.X[index], self.Y[index]

In [None]:
# Split the data into training and validation sets
from sklearn.model_selection import train_test_split
X_train_np, X_val_np, y_train_np, y_val_np = train_test_split(X_train_valid, y_train_valid, test_size=0.2)
X_train = torch.FloatTensor(X_train_np).to(device)
X_val = torch.FloatTensor(X_val_np).to(device)
y_train = torch.LongTensor(y_train_np).to(device)
y_val = torch.LongTensor(y_val_np).to(device)
X_test = torch.FloatTensor(X_test).to(device)
y_test = torch.LongTensor(y_test).to(device)


In [None]:
X_train_200 = X_train[:,:,:200]
X_train_200.shape

In [None]:
index = 0
X_train_valid[:,:,index:index+200].shape

In [None]:
import random
batch_size = 256
Train_dataloaders = []
Val_dataloaders = []
Test_dataloaders = []
time_duration = [100*(i+1) for i in range(10)]
time_len = X_train_valid.shape[2]
for td in time_duration:
  #index = random.randint(0,time_len-td)
  index = 0
  train_dataset = MyDataset(X_train[:,:,index:index+td], y_train)
  train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True)
  Train_dataloaders.append(train_dataloader)

  val_dataset = MyDataset(X_val[:,:,index:index+td], y_val)
  val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size, shuffle=False)
  Val_dataloaders.append(val_dataloader)

  Test_dataset = MyDataset(X_test[:,:,index:index+td],y_test)
  Test_dataloader = torch.utils.data.DataLoader(Test_dataset, batch_size, shuffle=False)
  Test_dataloaders.append(Test_dataloader)



Train a Model

In [None]:
def plot_train(loss_hist,val_loss_hist,acc_hist,val_acc_hist):
  plt.figure(dpi=100)
  plt.subplot(1,2,1)
  plt.subplots_adjust(wspace=1)
  plt.plot(loss_hist, label='training')
  plt.yscale('log')
  plt.ylabel('loss')
  plt.legend(loc='lower left')
  plt.xlabel('step (training)')
  plt.twiny()
  plt.plot(val_loss_hist, 'r', label='validation')
  plt.xlabel('step (validation)')
  plt.legend(loc='upper left')

  plt.subplot(1,2,2)
  plt.subplots_adjust(wspace=1)
  plt.plot(acc_hist, label='training')
  plt.ylabel('acc')
  plt.legend(loc='lower left')
  plt.xlabel('step (training)')
  plt.twiny()
  plt.plot(val_acc_hist, 'r', label='validation')
  plt.xlabel('step (validation)')
  plt.legend(loc='upper left')

In [None]:
import tqdm # for progress bar
# Set the random seeds.
# Sometimes some operations are not deterministic.
# See https://pytorch.org/docs/stable/notes/randomness.html
#   and https://pytorch.org/docs/stable/generated/torch.use_deterministic_algorithms.html
torch.manual_seed(12345) # set the random seed for pytorch
random.seed(12345) # just in case
np.random.seed(12345) # just in case

def train_CNN(td,train_dataloader,val_dataloader,test_dataloader, learning_rate = 0.0005, weight_decay = 0.05, num_epochs = 50, use_BN = True):
  input_size = (22,td)
  model = ShallowConvNet(input_shape=input_size,use_BN = use_BN).to(device)
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay = weight_decay)
  celoss = nn.CrossEntropyLoss()
  loss_hist = []
  val_loss_hist = []
  acc_hist = []
  val_acc_hist = []
  num_epochs = num_epochs

  # progress bar
  pbar = tqdm.tqdm(
      range(num_epochs), position=0, leave=True,
      bar_format='{l_bar}{bar:30}{r_bar}',
  )
  for epoch in pbar:
      model.train() # set model to training mode.
      for batch in train_dataloader:
          optimizer.zero_grad() # clear gradients of parameters that optimizer is optimizing
          x, y = batch

          # x = x.to(device) # necessary if X is not on the same device as model
          # y = y.to(device)

          out = model(x) # shape (batch_size, n_classes)
          loss = celoss(out, y) # calculate the cross entropy loss

          loss.backward() # backpropagate
          optimizer.step() # perform optimization step

          # IMPORTANT: DO NOT store 'loss' by itself, since it references its entire computational graph.
          # Otherwise you will run out of memory.
          # You MUST use .item() to convert to a scalar or call .detach().
          loss_hist.append(loss.item())
      
      model.eval() # set model to evaluation mode. Relevant for dropout, batchnorm, layernorm, etc.
      # calculate accuracy for training and validation sets
      ns = 0 # number of samples
      nc = 0 # number of correct outputs
      with torch.no_grad():
          for batch in train_dataloader:
              x, y = batch
              out = model(x)
              ns += len(y)
              nc += (out.max(1)[1] == y).detach().cpu().numpy().sum()
      acc_hist.append(nc/ns)

      ns = 0 # number of samples
      nc = 0 # number of correct outputs
      with torch.no_grad():
          for batch in val_dataloader:
              x, y = batch
              out = model(x)
              loss = celoss(out, y)
              val_loss_hist.append(loss.item())
              ns += len(y)
              nc += (out.max(1)[1] == y).detach().cpu().numpy().sum()
      val_acc_hist.append(nc/ns)


      # update progress bar postfix
      pbar.set_postfix({'acc': acc_hist[-1], 'val_acc': val_acc_hist[-1]})

  test_loss_hist = []
  test_acc_hist = []
  ns = 0
  nc = 0
  model.eval()
  with torch.no_grad():
      for batch in test_dataloader:
          x, y = batch
          out = model(x)
          loss = celoss(out, y)
          test_loss_hist.append(loss.item())
          ns += len(y)
          nc += (out.max(1)[1] == y).detach().cpu().numpy().sum()
  test_acc_hist.append(nc/ns)
  print('Test ACC:',test_acc_hist[-1])
  plot_train(loss_hist,val_loss_hist,acc_hist,val_acc_hist)
      
  return   loss_hist, val_loss_hist, test_loss_hist, acc_hist, val_acc_hist, test_acc_hist

In [None]:
loss_hists = []
val_loss_hists = [] 
test_loss_hists = []
acc_hists = []
val_acc_hists = [] 
test_acc_hists = []
for index,td in enumerate(time_duration):
  loss_hist, val_loss_hist, test_loss_hist, acc_hist, val_acc_hist, test_acc_hist = train_CNN(td,Train_dataloaders[index],Val_dataloaders[index],Test_dataloaders[index])
  loss_hists.append(loss_hist)
  val_loss_hists.append(val_loss_hist)
  test_loss_hists.append(test_loss_hist)
  acc_hists.append(acc_hist)
  val_acc_hists.append(val_acc_hist)
  test_acc_hists.append(test_acc_hist)


In [None]:
plt.plot(time_duration, test_acc_hists, label='test')
plt.title('Test Accuracy on different Time Periods')
plt.xticks(range(100,1100,100))
plt.ylabel('Test Acc')
plt.legend(loc='upper left')
plt.xlabel('Time')
plt.grid()


## Save the model

In [None]:
# # Best to use .pt for file extensions
# # I used to think .pth was used, but it turns out it can have potential conflicts with python
# #   (See https://stackoverflow.com/questions/59095824/what-is-the-difference-between-pt-pth-and-pwf-extentions-in-pytorch)

# # Save only the model weights
# #torch.save(model.state_dict(), 'shallowconvnet_weightsonly.pt')

# # Save an entire checkpoint
# checkpoint = {
#     'model': model.state_dict(),
#     'optimizer': optimizer.state_dict(),
#     'loss_hist': loss_hist,
#     'acc_hist': acc_hist,
#     'val_loss_hist': val_loss_hist,
#     'val_acc_hist': val_acc_hist,
# }
# torch.save(checkpoint, 'shallowconvnet_checkpoint.pt')
# # chkpt = torch.load('shallowconvnet_checkpoint.pt')
# # model.load_state_dict(chkpt['model'])