In [32]:
import pandas as pd
import numpy as np

from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import MultiStepLR 
from torch.utils.tensorboard import SummaryWriter

import matplotlib.pyplot as plt
import seaborn as sns

import data

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
X_train, y_train, X_test, y_test = data.get_1060_normalized()

In [14]:
# Convert to float32 and torch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)

# Convert y from [1, 2, 3, 4] → [0, 1, 2, 3]
y_train_tensor = torch.tensor(y_train.values - 1, dtype=torch.long).squeeze()
y_test_tensor = torch.tensor(y_test.values - 1, dtype=torch.long).squeeze()

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

def print_stat(x, name):
    print(f"======= {name} ======")
    print(x.shape)
    print(f"{x.min()=}")
    print(f"{x.max()=}")
    if x.dtype == torch.float32:
        print(f"{x.mean()=}")
        print(f"{x.std()=}")
    if name.lower().startswith("y_"):
        unique_values, counts = torch.unique(x, return_counts=True)
        print("Unique values:", unique_values)
        print("Counts:", counts)

print_stat(X_train_tensor, "X_Train")
print_stat(X_test_tensor, "X_Test")
print_stat(y_train_tensor, "Y_Train")
print_stat(y_test_tensor, "Y_Test")


torch.Size([1060, 2550])
x.min()=tensor(-2.8571)
x.max()=tensor(91.7090)
x.mean()=tensor(0.0722)
x.std()=tensor(0.9352)
torch.Size([140, 2550])
x.min()=tensor(-2.6429)
x.max()=tensor(63.7779)
x.mean()=tensor(-0.0114)
x.std()=tensor(0.8815)
torch.Size([1060])
x.min()=tensor(0)
x.max()=tensor(3)
Unique values: tensor([0, 1, 2, 3])
Counts: tensor([265, 265, 265, 265])
torch.Size([140])
x.min()=tensor(0)
x.max()=tensor(3)
Unique values: tensor([0, 1, 2, 3])
Counts: tensor([53, 14, 66,  7])


In [35]:
NUM_CLASSES = 4
BATCH_SIZE = 10
EPOCHS = 2000
LEARNING_RATE = 0.001
features_count = X_train.shape[1]
features_count

2550

In [41]:
# Define the model
class CustomNN(nn.Module):
    def __init__(self):
        super(CustomNN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(features_count, 10),
            nn.ReLU(),
            nn.Linear(10, 50),
            nn.ReLU(),            
            nn.Linear(50, 4)  # 4 output classes
        )

    def forward(self, x):
        return self.model(x)

model = CustomNN()
optimizer = torch.optim.Adam(model.parameters(), lr= LEARNING_RATE)
scheduler = MultiStepLR(optimizer, milestones=[100, 400, 800, 1500], gamma=0.1)
loss_function = nn.CrossEntropyLoss()

print(f"{optimizer = }")
print(f"{loss_function = }")

optimizer = Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    initial_lr: 0.001
    lr: 0.001
    maximize: False
    weight_decay: 0
)
loss_function = CrossEntropyLoss()


In [36]:
def evaluation(model, loader):
    
    model.eval()  # set model to evaluation mode
    preds, labels = [], []

    with torch.no_grad():
        for X_batch, y_batch in loader:
            outputs = model(X_batch)
            _, output = torch.max(outputs, 1)
            preds.extend(output.numpy())
            labels.extend(y_batch.numpy())

    return accuracy_score(labels, preds) 
    # print("Training Accuracy:", accuracy_score(train_labels, train_preds))
    # print("Classification Report (Train):\n", classification_report(train_labels, train_preds))
    # print("Confusion Matrix (Train):\n", confusion_matrix(train_labels, train_preds))

In [42]:
# Train the model
tensorboard_writer = SummaryWriter("logs")
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0

    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = loss_function(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    scheduler.step()

    info = {
        'loss': total_loss,
        'lr': scheduler.get_last_lr()[0],
    }
    
    if epoch % 10 == 0:
        train_acc = evaluation(model, train_loader)
        test_acc = evaluation(model, test_loader)
        info["train_accuracy"] = train_acc
        info["test_accuracy"] = test_acc
        print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {total_loss:.4f}, Train_Acc = {train_acc:0.4f}, Test_Acc = {test_acc:0.4f}")
    
    for tag, value in info.items():
        tensorboard_writer.add_scalar("scalars/" + tag, value, epoch+1)

print("Training finished!")


Epoch 1/2000, Loss: 44.2438, Train_Acc = 0.5462, Test_Acc = 0.3071
Epoch 11/2000, Loss: 7.4880, Train_Acc = 0.9292, Test_Acc = 0.6786
Epoch 21/2000, Loss: 2.9444, Train_Acc = 0.9811, Test_Acc = 0.8071
Epoch 31/2000, Loss: 4.2385, Train_Acc = 0.9708, Test_Acc = 0.7500
Epoch 41/2000, Loss: 3.2246, Train_Acc = 0.9811, Test_Acc = 0.8214
Epoch 51/2000, Loss: 5.4862, Train_Acc = 0.9811, Test_Acc = 0.8071
Epoch 61/2000, Loss: 2.1015, Train_Acc = 0.9811, Test_Acc = 0.7929
Epoch 71/2000, Loss: 2.0655, Train_Acc = 0.9811, Test_Acc = 0.7929
Epoch 81/2000, Loss: 2.0177, Train_Acc = 0.9811, Test_Acc = 0.8071
Epoch 91/2000, Loss: 3.1595, Train_Acc = 0.9585, Test_Acc = 0.7786
Epoch 101/2000, Loss: 1.9894, Train_Acc = 0.9811, Test_Acc = 0.7857
Epoch 111/2000, Loss: 1.7584, Train_Acc = 0.9811, Test_Acc = 0.7857
Epoch 121/2000, Loss: 1.7258, Train_Acc = 0.9811, Test_Acc = 0.7929
Epoch 131/2000, Loss: 1.7221, Train_Acc = 0.9811, Test_Acc = 0.7929
Epoch 141/2000, Loss: 1.6986, Train_Acc = 0.9811, Test_Acc