In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import pandas as pd
import numpy as np
import scipy
import matplotlib.pyplot as plt

# Process dataset
Extract data from the txt file for each user. We only extract the data for the time frame where the user is doing one action, not across different actions. Then we segment the data into 2 seconds window.

In [3]:
# from scipy import stats

# def extract_features(data):
#     fft = []
#     for i in range(data.shape[0]):
#         fft.extend(np.float32(np.abs(np.fft.fft(data[i]))[:window_size//2] / window_size))
#     return np.float32(fft)

class MotionDataset(torch.utils.data.Dataset):
    
    def __init__(self, data_directory, window_size=100, is_train=True, transform=None):
        self.data_directory = data_directory
        self.window_size = window_size
        self.is_train = is_train
        self.dataset_x = []
        self.dataset_y = []
        self._initialize_dataset()

    def _initialize_dataset(self):
        raw_labels = open(self.data_directory + "/labels.txt", "r")
        self.dataset_x = []
        self.dataset_y = []
        current_exp, current_user = 0, 0
        acc_current_file_lines = []
        gyro_current_file_lines = []
        for line in raw_labels:
            # get the experiment and user data
            line_split = list(map(int,line.split()))

            # filter out the transition movements
            if line_split[2] >= 4:
                continue

            # open new file if the current experiment and user are different from the previous run
            if line_split[0] != current_exp or line_split[1] != current_user:
                current_exp, current_user = line_split[0], line_split[1]

                # Get the accelerometer data
                current_file = open(self.data_directory + "/acc_exp" + str.zfill(str(current_exp), 2) + "_user" + str.zfill(str(current_user), 2) + ".txt", "r")
                # print("Opened file: " + current_file.name)
                # process lines
                acc_current_file_lines = []
                for file_line in current_file:
                    acc_current_file_lines.append(list(map(float, file_line.split())))
                current_file.close()

                # Get the gyro data
                current_file = open(self.data_directory + "/gyro_exp" + str.zfill(str(current_exp), 2) + "_user" + str.zfill(str(current_user), 2) + ".txt", "r")
                # print("Opened file: " + current_file.name)
                gyro_current_file_lines = []
                for file_line in current_file:
                    gyro_current_file_lines.append(list(map(float, file_line.split())))
                current_file.close()
            
            # get the label, start and end indices
            label, start, end = line_split[2:5]
            label -= 1 # convert to 0-indexed

            # sliding window
            for i in range(start, end - window_size, window_size):
                # calculate fft for the window
                acc_window = acc_current_file_lines[i:i+window_size]
                gyro_window = gyro_current_file_lines[i:i+window_size]
                inputs = np.concatenate((acc_window, gyro_window), axis=1)
                inputs = np.float32(np.transpose(inputs))
                self.dataset_x.append(inputs)
                self.dataset_y.append(label)

        raw_labels.close()

        print("Dataset initialized with size: " + str(len(self.dataset_y)))
        print("Number of data for class 1: " + str(self.dataset_y.count(0)))
        print("Number of data for class 2: " + str(self.dataset_y.count(1)))
        print("Number of data for class 3: " + str(self.dataset_y.count(2)))

    def __len__(self):
        return len(self.dataset_y)

    def __getitem__(self, idx):
        return self.dataset_x[idx], self.dataset_y[idx]

window_size = 100   # 50Hz, 100 samples = 2s of movement
dataset = MotionDataset("HAPT Data Set/RawData", window_size)
print("shape of data: " + str(dataset[0][0].shape))

Dataset initialized with size: 3218
Number of data for class 1: 1161
Number of data for class 2: 1078
Number of data for class 3: 979
shape of data: (6, 100)


In [4]:
# Sample data
dataset[0][0].shape

(6, 100)

In [5]:
# Setup training and testing data
batch_size = 32
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)


In [11]:
device = torch.device("mps")
print(device)

mps


# Setup the model

In [53]:
input_size = dataset[0][0].shape
print(input_size)

net = torch.nn.Sequential(
    torch.nn.Conv1d(6, 64, kernel_size=6, stride=1, padding=0),
    torch.nn.ReLU(),
    torch.nn.Conv1d(64, 32, kernel_size=6, stride=1, padding=0),
    torch.nn.ReLU(),
    torch.nn.Flatten(),
    torch.nn.LazyLinear(3),
    torch.nn.Softmax()
)

net.to(device)

(6, 100)


Sequential(
  (0): Conv1d(6, 64, kernel_size=(6,), stride=(1,))
  (1): ReLU()
  (2): Conv1d(64, 32, kernel_size=(6,), stride=(1,))
  (3): ReLU()
  (4): Flatten(start_dim=1, end_dim=-1)
  (5): LazyLinear(in_features=0, out_features=3, bias=True)
  (6): Softmax(dim=None)
)

In [54]:
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.Adam(net.parameters(), lr=0.001, amsgrad=True)
# optimizer = optim.SGD(net.parameters(), lr=0.01)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5, verbose=True)

In [55]:
min_valid_loss = np.inf
epochs = 100

In [56]:
net.train()
def get_num_correct(preds, labels):
    return preds.round().eq(labels).sum().item()

for epoch in range(epochs):  # loop over the dataset multiple times

    running_loss = 0.0
    total_training_loss = 0.0
    batch_count = 0
    for i, data in enumerate(train_loader, 0):
        batch_count += 1
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        total_training_loss += loss.item()
        vaccuracy = 0
            
    valid_loss = 0.0
    net.eval()     # Optional when not using Model Specific layer
    vcount = 0
    vcorrect = 0
    for j, vdata in enumerate(test_loader, 0):
        vcount += 1
        vdata, vlabels = vdata[0].to(device), vdata[1].to(device)
        # Forward Pass
        target = net(vdata)
        # Find the Loss
        loss = criterion(target,vlabels)
        # Calculate Loss
        valid_loss += loss.item()
        # Calculate accuracy
        vcorrect += (target.argmax(1) == vlabels).sum().item()
    valid_loss = valid_loss / vcount
    vaccuracy = vcorrect / len(test_loader.dataset)
    net.train()
    print(f'[Epoch:{epoch + 1:2d}] \t Training Loss: {running_loss / batch_count:5f} \t Validation Loss: {valid_loss:5f} \t Accuracy: {vaccuracy:5f}')
    
    # save model if loss improved, and obtain predictions
    if (valid_loss < min_valid_loss):
        min_valid_loss = valid_loss
        torch.save(net, "model.pt")
        print("evaluation loss reduced, model saved")
    scheduler.step(vaccuracy)

print('Finished Training')

[Epoch: 1] 	 Training Loss: 0.858379 	 Validation Loss: 0.775911 	 Accuracy: 0.770186
evaluation loss reduced, model saved
[Epoch: 2] 	 Training Loss: 0.729318 	 Validation Loss: 0.694526 	 Accuracy: 0.850932
evaluation loss reduced, model saved
[Epoch: 3] 	 Training Loss: 0.672306 	 Validation Loss: 0.638002 	 Accuracy: 0.923913
evaluation loss reduced, model saved
[Epoch: 4] 	 Training Loss: 0.625124 	 Validation Loss: 0.613664 	 Accuracy: 0.944099
evaluation loss reduced, model saved
[Epoch: 5] 	 Training Loss: 0.602788 	 Validation Loss: 0.601747 	 Accuracy: 0.947205
evaluation loss reduced, model saved
[Epoch: 6] 	 Training Loss: 0.585867 	 Validation Loss: 0.581793 	 Accuracy: 0.978261
evaluation loss reduced, model saved
[Epoch: 7] 	 Training Loss: 0.582256 	 Validation Loss: 0.587110 	 Accuracy: 0.967391
[Epoch: 8] 	 Training Loss: 0.571631 	 Validation Loss: 0.573777 	 Accuracy: 0.976708
evaluation loss reduced, model saved
[Epoch: 9] 	 Training Loss: 0.570180 	 Validation Los