# UCI HAR Dataset - Activity Recognition
## Using PyTorch for Deep Learning & Machine Learning
This notebook explores human activity recognition using the UCI HAR dataset. We implement:
- **LSTM & CNN using PyTorch**
- **Feature extraction using TSFEL**
- **Machine Learning models (Random Forest, SVM, Logistic Regression)**
- **Performance comparison between ML & DL approaches**

In [5]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import tsfel
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

In [6]:
def load_data():
    # Constants
    time = 10
    offset = 100
    folders = ["LAYING","SITTING","STANDING","WALKING","WALKING_DOWNSTAIRS","WALKING_UPSTAIRS"]
    classes = {"WALKING":1,"WALKING_UPSTAIRS":2,"WALKING_DOWNSTAIRS":3,"SITTING":4,"STANDING":5,"LAYING":6}

    combined_dir = os.path.join("Combined")

    X_train=[]
    y_train=[]
    dataset_dir = os.path.join(combined_dir,"Train")

    for folder in folders:
        files = os.listdir(os.path.join(dataset_dir,folder))

        for file in files:

            df = pd.read_csv(os.path.join(dataset_dir,folder,file),sep=",",header=0)
            df = df[offset:offset+time*50]
            X_train.append(df.values)
            y_train.append(classes[folder])

    X_train = np.array(X_train)
    y_train = np.array(y_train)

    X_test=[]
    y_test=[]
    dataset_dir = os.path.join(combined_dir,"Test")

    for folder in folders:
        files = os.listdir(os.path.join(dataset_dir,folder))
        for file in files:

            df = pd.read_csv(os.path.join(dataset_dir,folder,file),sep=",",header=0)
            df = df[offset:offset+time*50]
            X_test.append(df.values)
            y_test.append(classes[folder])

    X_test = np.array(X_test)
    y_test = np.array(y_test)

    X = np.concatenate((X_train,X_test))
    y = np.concatenate((y_train,y_test))

    seed = 4
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=seed,stratify=y)

    print("Training data shape: ",X_train.shape)
    print("Testing data shape: ",X_test.shape)

    return X_train,X_test,y_train,y_test

In [7]:
classes = {"WALKING":1,"WALKING_UPSTAIRS":2,"WALKING_DOWNSTAIRS":3,"SITTING":4,"STANDING":5,"LAYING":6}
X_train,X_test,y_train,y_test = load_data()

Training data shape:  (126, 500, 3)
Testing data shape:  (54, 500, 3)


In [None]:
# Extract Features using TSFEL
cfg = tsfel.get_features_by_domain('statistical')
X_train_tsfel = tsfel.time_series_features_extractor(cfg, X_train)
X_test_tsfel = tsfel.time_series_features_extractor(cfg, X_test)

print('Feature Extraction Completed!')

  X_train_tsfel = tsfel.time_series_features_extractor(cfg, X_train)


In [None]:
# Train Machine Learning Models
def train_ml_model(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(classification_report(y_test, y_pred))
    print(f'Accuracy: {accuracy_score(y_test, y_pred):.4f}')

# Random Forest
print('Random Forest:')
train_ml_model(RandomForestClassifier(), X_train_tsfel, y_train, X_test_tsfel, y_test)

# SVM
print('SVM:')
train_ml_model(SVC(), X_train_tsfel, y_train, X_test_tsfel, y_test)

# Logistic Regression
print('Logistic Regression:')
train_ml_model(LogisticRegression(), X_train_tsfel, y_train, X_test_tsfel, y_test)

In [None]:
# Define LSTM Model in PyTorch
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=1):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# Initialize Model
input_dim = X_train.shape[1]
model = LSTMModel(input_dim, hidden_dim=64, output_dim=len(np.unique(y_train)))
print(model)

In [None]:
# Define 1D CNN Model in PyTorch
class CNNModel(nn.Module):
    def __init__(self, input_channels, num_classes):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv1d(input_channels, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.fc = nn.Linear(64 * (input_channels // 2), num_classes)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Initialize Model
model_cnn = CNNModel(input_channels=X_train.shape[1], num_classes=len(np.unique(y_train)))
print(model_cnn)

## Conclusion
- **Deep Learning Models (LSTM & CNN)** are better for sequential data.
- **Machine Learning Models** trained on TSFEL-generated features perform well.
- **LSTM is ideal for time-series prediction.**
- **Random Forest is the best ML model for this dataset.**