In [1]:
import os
import json
import torch
import torch.optim as optim
import torch.nn as nn
import pandas as pd
import numpy as np
import optuna
from torch.utils.data import Dataset, DataLoader
from model_definition import LSTMClassifier, CNN1DClassifier, MLPClassifier                 # Load models
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [2]:
### DATA INGESTION FROM CSV

class SensorDataset(Dataset):
    def __init__(self, csv_dir, labels_dict, statistical_processing=False):
        self.csv_dir = csv_dir
        self.labels_dict = labels_dict
        self.file_list = [f for f in os.listdir(csv_dir) if f in self.labels_dict]
        self.statistical_processing = statistical_processing

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        file_name = self.file_list[idx]
        file_path = os.path.join(self.csv_dir, file_name)
        
        df = pd.read_csv(file_path)

        features = df.iloc[:, 1:].values
        if self.statistical_processing:
            # Process for each feature -> x3
            mean_cols = np.tile(np.mean(features, axis=0), (25, 1))
            std_cols = np.tile(np.std(features, axis=0), (25, 1))
            features = np.hstack([features, mean_cols, std_cols])
            
        features = torch.tensor(features, dtype=torch.float32)
        label = torch.tensor(self.labels_dict[file_name], dtype=torch.float32)

        return features, label

def create_dataloader(dataset, csv_dir, labels_dict, statistical_processing=False, batch_size=4):
    data = dataset(csv_dir, labels_dict, statistical_processing)
    return DataLoader(data, batch_size=batch_size, shuffle=True)

In [3]:
### TRAINING

def train_model(dataloader, model, criterion, optimiser, epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        
        for batch_features, batch_labels in dataloader:
            if isinstance(model, CNN1DClassifier):
                batch_features = batch_features.transpose(1, 2)
            batch_features = batch_features.to(device)
            batch_labels = batch_labels.to(device).long()
            
            optimiser.zero_grad()
            outputs = model(batch_features)
            loss = criterion(outputs, batch_labels)
            loss.backward()
            optimiser.step()
            
            total_loss += loss.item()
            
        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}")

    # return loss.item()         # For Optuna, eventually

statistical_processing = False
data_folder_path = "data/dummy/dataset"
with open("data/dummy/dataset/labels.json", "r") as f:
    labels_dict = json.load(f)
dataloader = create_dataloader(SensorDataset, data_folder_path, labels_dict, statistical_processing, batch_size=5)

# model = LSTMClassifier(input_size=8*3 if statistical_processing else 8, hidden_size=64, num_layers=2, num_classes=8)
model = CNN1DClassifier(input_size=8, num_classes=8)
# model = MLPClassifier(input_size=8, num_classes=8)

lr = 0.001
batch_size = 16
epochs = 20
criterion = nn.CrossEntropyLoss()
optimiser = optim.Adam(model.parameters(), lr)

train_model(dataloader, model, criterion, optimiser, epochs)

Epoch 1/20 - Loss: 42.7449
Epoch 2/20 - Loss: 25.5671
Epoch 3/20 - Loss: 15.2530
Epoch 4/20 - Loss: 11.7506
Epoch 5/20 - Loss: 8.2157
Epoch 6/20 - Loss: 7.9120
Epoch 7/20 - Loss: 8.8302
Epoch 8/20 - Loss: 8.3544
Epoch 9/20 - Loss: 6.8826
Epoch 10/20 - Loss: 4.8389
Epoch 11/20 - Loss: 4.9417
Epoch 12/20 - Loss: 3.6754
Epoch 13/20 - Loss: 1.7469
Epoch 14/20 - Loss: 3.0781
Epoch 15/20 - Loss: 3.2896
Epoch 16/20 - Loss: 2.6153
Epoch 17/20 - Loss: 2.7862
Epoch 18/20 - Loss: 3.1733
Epoch 19/20 - Loss: 3.3038
Epoch 20/20 - Loss: 3.2444


In [4]:
### PREDICTION AND EVALUATION

def get_features_from_csv(file_path, statistical_processing=False):
    df = pd.read_csv(file_path)
    features = df.iloc[:, 1:].values

    if statistical_processing:
        mean_cols = np.tile(np.mean(features, axis=0), (25, 1))
        std_cols = np.tile(np.std(features, axis=0), (25, 1))
        features = np.hstack([features, mean_cols, std_cols])

    return features

def predict_csv(model, features):
    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if isinstance(model, CNN1DClassifier):      # TODO: test if moving it down here works
        features = features.transpose()
    input_tensor = torch.tensor(features, dtype=torch.float32).unsqueeze(0).to(device)
    
    with torch.no_grad():
        logits = model(input_tensor)
        probs = torch.softmax(logits, dim=1)
        
        pred_class = torch.argmax(probs, dim=1).item()
        confidence = torch.max(probs).item()
        
    return pred_class, confidence

def evaluate_folder(model, folder_path, labels_dict, statistical_processing=False):       
    y_true = []
    y_pred = []

    for file_name, label in labels_dict.items():
        file_path = os.path.join(folder_path, file_name)
        features = get_features_from_csv(file_path, statistical_processing)

        pred, confidence = predict_csv(model, features)
        y_true.append(label)
        y_pred.append(pred)

    accuracy = accuracy_score(y_true, y_pred)
    conf_matrix = confusion_matrix(y_true, y_pred, labels=list(range(8)))
    report = classification_report(y_true, y_pred, labels=list(range(8)), zero_division=0)
    
    return accuracy, conf_matrix, report

acc, cm, report = evaluate_folder(model, data_folder_path, labels_dict, statistical_processing)
print(acc)

0.4


In [5]:
### EXPORT MODEL

def export_model(model):
    model.eval()
    torch.save(model.state_dict(), "cnn_weights.pth")

export_model(model)