# Cell 1 - Import & path

In [1]:
import os
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

print("✓ Imports OK")
print("PyTorch version:", torch.__version__)

✓ Imports OK
PyTorch version: 2.2.2


In [2]:
DATA_PROCESSED_DIR = os.path.join("..", "data", "processed")
data_path = os.path.join(DATA_PROCESSED_DIR, "spotify_dataset_clustered.csv")

df = pd.read_csv(data_path)
print("Dataset loaded:", df.shape)
df.head()

Dataset loaded: (169909, 18)


Unnamed: 0,track_id,track_name,artist_name,popularity,year,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,duration_ms,pca_x,pca_y,cluster
0,6KbQ3uYMLKb5jDxLF7wYDD,Singende Bataillone 1. Teil,['Carl Woitschach'],0,1928,0.995,0.708,0.195,0.563,0.151,-12.428,0.0506,118.469,0.779,158648,-1.010631,1.593194,7
1,6KuQTIu1KoTTkLXKrwlLPV,"Fantasiestücke, Op. 111: Più tosto lento","['Robert Schumann', 'Vladimir Horowitz']",0,1928,0.994,0.379,0.0135,0.901,0.0763,-28.454,0.0462,83.972,0.0767,282133,-4.751081,-0.113671,0
2,6L63VW0PibdM1HDSBoqnoM,Chapter 1.18 - Zamek kaniowski,['Seweryn Goszczyński'],0,1928,0.604,0.749,0.22,0.0,0.119,-19.924,0.929,107.177,0.88,104300,-0.184709,4.573615,7
3,6M94FkXd15sOAOQYRnWPN8,Bebamos Juntos - Instrumental (Remasterizado),['Francisco Canaro'],0,1928,0.995,0.781,0.13,0.887,0.111,-14.734,0.0926,108.003,0.72,180760,-1.671672,1.857104,5
4,6N6tiFZ9vLTSOIxkj8qKrd,"Polonaise-Fantaisie in A-Flat Major, Op. 61","['Frédéric Chopin', 'Vladimir Horowitz']",1,1928,0.99,0.21,0.204,0.908,0.098,-16.829,0.0424,62.149,0.0693,687733,-4.018761,-2.63007,0


# Cell 2 - Selecting Audio features from the dataset

In [3]:
feature_cols = [
    "acousticness", "danceability", "energy", "instrumentalness",
    "liveness", "loudness", "speechiness", "tempo", "valence",
    "duration_ms"
]

target_col = "cluster"

# Drop righe con NaN in feature o target
df_model = df.dropna(subset=feature_cols + [target_col]).copy()
print("Dataset after dropna:", df_model.shape)

X = df_model[feature_cols].values
y = df_model[target_col].astype(int).values

num_classes = len(np.unique(y))
print("Num classes:", num_classes)


Dataset after dropna: (169909, 18)
Num classes: 8


# Cell 3 - Train/val/test split + scaler

In [4]:
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print("Train:", X_train.shape, "Val:", X_val.shape, "Test:", X_test.shape)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled   = scaler.transform(X_val)
X_test_scaled  = scaler.transform(X_test)


Train: (135927, 10) Val: (16991, 10) Test: (16991, 10)


# Cell 4 - Dataset & DataLoader PyTorch

In [5]:
class MusicDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)  # class indices

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


train_ds = MusicDataset(X_train_scaled, y_train)
val_ds   = MusicDataset(X_val_scaled, y_val)
test_ds  = MusicDataset(X_test_scaled, y_test)

BATCH_SIZE = 256

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False)
test_loader  = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)

len(train_ds), len(val_ds), len(test_ds)


(135927, 16991, 16991)

# Cell 5 - Definition of the MLP model

In [6]:
input_dim = X_train_scaled.shape[1]

class MusicMLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        return self.net(x)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MusicMLP(input_dim=input_dim, num_classes=num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

print(model)
print("Device:", device)


MusicMLP(
  (net): Sequential(
    (0): Linear(in_features=10, out_features=64, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=64, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.2, inplace=False)
    (6): Linear(in_features=64, out_features=8, bias=True)
  )
)
Device: cpu


# Cell 6 - train/val per epoch

In [7]:
def run_epoch(loader, model, criterion, optimizer=None):
    if optimizer is None:
        model.eval()
    else:
        model.train()

    total_loss = 0.0
    total = 0
    correct = 0

    for X_batch, y_batch in loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)

        logits = model(X_batch)
        loss = criterion(logits, y_batch)

        if optimizer is not None:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        total_loss += loss.item() * y_batch.size(0)
        total += y_batch.size(0)

        preds = torch.argmax(logits, dim=1)
        correct += (preds == y_batch).sum().item()

    avg_loss = total_loss / total
    acc = correct / total
    return avg_loss, acc


In [8]:
EPOCHS = 20

best_val_acc = 0.0
best_state = None

for epoch in range(1, EPOCHS + 1):
    train_loss, train_acc = run_epoch(train_loader, model, criterion, optimizer)
    val_loss, val_acc = run_epoch(val_loader, model, criterion, optimizer=None)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_state = model.state_dict()

    print(
        f"Epoch {epoch}/{EPOCHS} | "
        f"train loss: {train_loss:.4f}, acc: {train_acc:.3f} | "
        f"val loss: {val_loss:.4f}, acc: {val_acc:.3f}"
    )

# carico i pesi migliori
if best_state is not None:
    model.load_state_dict(best_state)
    print(f"\nLoaded best model (val acc = {best_val_acc:.3f})")


Epoch 1/20 | train loss: 0.5507, acc: 0.806 | val loss: 0.1672, acc: 0.954
Epoch 2/20 | train loss: 0.2319, acc: 0.907 | val loss: 0.1172, acc: 0.969
Epoch 3/20 | train loss: 0.1823, acc: 0.927 | val loss: 0.0976, acc: 0.973
Epoch 4/20 | train loss: 0.1536, acc: 0.938 | val loss: 0.0826, acc: 0.976
Epoch 5/20 | train loss: 0.1360, acc: 0.946 | val loss: 0.0715, acc: 0.980
Epoch 6/20 | train loss: 0.1232, acc: 0.950 | val loss: 0.0630, acc: 0.985
Epoch 7/20 | train loss: 0.1137, acc: 0.954 | val loss: 0.0594, acc: 0.984
Epoch 8/20 | train loss: 0.1062, acc: 0.957 | val loss: 0.0572, acc: 0.983
Epoch 9/20 | train loss: 0.0993, acc: 0.960 | val loss: 0.0500, acc: 0.988
Epoch 10/20 | train loss: 0.0968, acc: 0.960 | val loss: 0.0514, acc: 0.984
Epoch 11/20 | train loss: 0.0904, acc: 0.963 | val loss: 0.0477, acc: 0.987
Epoch 12/20 | train loss: 0.0890, acc: 0.964 | val loss: 0.0481, acc: 0.985
Epoch 13/20 | train loss: 0.0837, acc: 0.966 | val loss: 0.0451, acc: 0.985
Epoch 14/20 | train l

# Cell 7 - Validation on the test set

In [9]:
test_loss, test_acc = run_epoch(test_loader, model, criterion, optimizer=None)
print(f"Test loss: {test_loss:.4f}, acc: {test_acc:.3f}")


Test loss: 0.0390, acc: 0.986


# Cell 8 - Saving Model & Scaler

In [10]:
MODEL_DIR = os.path.join("..", "models")
os.makedirs(MODEL_DIR, exist_ok=True)

model_path = os.path.join(MODEL_DIR, "music_mood_mlp.pt")
torch.save(model.state_dict(), model_path)

print("Model saved to:", model_path)


Model saved to: ../models/music_mood_mlp.pt


In [11]:
import joblib

scaler_path = os.path.join(MODEL_DIR, "scaler_audio_features.pkl")
joblib.dump(scaler, scaler_path)

print("Scaler saved to:", scaler_path)


Scaler saved to: ../models/scaler_audio_features.pkl
