In [1]:
import os
import json
import torch
import torch.optim as optim
import torch.nn as nn
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader

In [2]:
class SensorDataset(Dataset):
    def __init__(self, csv_dir, label_file):
        self.csv_dir = csv_dir
        with open(label_file, 'r') as f:
            self.labels_dict = json.load(f)
        self.file_list = [f for f in os.listdir(csv_dir) if f in self.labels_dict]

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        file_name = self.file_list[idx]
        file_path = os.path.join(self.csv_dir, file_name)
        
        df = pd.read_csv(file_path)
        
        features = df.iloc[:, 1:].values
        features = torch.tensor(features, dtype=torch.float32)
        
        label = self.labels_dict[file_name]
        label = torch.tensor(label, dtype=torch.float32)
        
        return features, label

def create_dataloader(csv_dir, label_file, batch_size=4):
    """
    Create dataloader for features (folder of CSVs) and labels (single JSON file).
    """
    dataset = SensorDataset(csv_dir, label_file)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
# TODO: experiment with a less complex model, say CNN or 2-3 layer MLP
class LSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [None]:
### TRAINING MODEL

dataloader = create_dataloader("data/dummy/dataset", "data/dummy/labels.json", batch_size=3)
model = LSTMClassifier(input_size=8, hidden_size=64, num_layers=2, output_size=8)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Basic training loop
for epoch in range(5):
    model.train()
    total_loss = 0
    
    for batch_features, batch_labels in dataloader:
        optimizer.zero_grad()
        
        outputs = model(batch_features)
        
        loss = criterion(outputs, batch_labels.long())
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    print(f"Epoch {epoch+1} - Average Loss: {total_loss/len(dataloader):.4f}")

Epoch 1 - Average Loss: 2.1560
Epoch 2 - Average Loss: 2.0836
Epoch 3 - Average Loss: 2.0590
Epoch 4 - Average Loss: 2.0302
Epoch 5 - Average Loss: 1.9881


In [None]:
### PREDICTIOn

# TODO: create method that takes in input, and returns prediction