# Cell 1 - import vari

In [1]:
import os
import joblib
import torch
import torch.nn as nn
import pandas as pd

In [2]:
MODEL_DIR = os.path.join("..", "models")
scaler_path = os.path.join(MODEL_DIR, "scaler_audio_features.pkl")
model_path = os.path.join(MODEL_DIR, "music_mood_mlp.pt")

In [3]:
class MusicMLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        return self.net(x)

# Cell 2 - Setting and Running the model

In [4]:
scaler = joblib.load(scaler_path)
print("Scaler caricato.")

Scaler caricato.


In [5]:
feature_cols = [
    "acousticness", "danceability", "energy", "instrumentalness",
    "liveness", "loudness", "speechiness", "tempo", "valence",
    "duration_ms"
]

num_classes = 8  

input_dim = len(feature_cols)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = MusicMLP(input_dim=input_dim, num_classes=num_classes).to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()

print("Modello caricato su:", device)

Modello caricato su: cpu


In [6]:
DATA_PROCESSED_DIR = os.path.join("..", "data", "processed")
data_path = os.path.join(DATA_PROCESSED_DIR, "spotify_dataset_clustered.csv")

df = pd.read_csv(data_path)
print("Dataset loaded:", df.shape)
df.head()

Dataset loaded: (169909, 18)


Unnamed: 0,track_id,track_name,artist_name,popularity,year,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,duration_ms,pca_x,pca_y,cluster
0,6KbQ3uYMLKb5jDxLF7wYDD,Singende Bataillone 1. Teil,['Carl Woitschach'],0,1928,0.995,0.708,0.195,0.563,0.151,-12.428,0.0506,118.469,0.779,158648,-1.010631,1.593194,7
1,6KuQTIu1KoTTkLXKrwlLPV,"Fantasiestücke, Op. 111: Più tosto lento","['Robert Schumann', 'Vladimir Horowitz']",0,1928,0.994,0.379,0.0135,0.901,0.0763,-28.454,0.0462,83.972,0.0767,282133,-4.751081,-0.113671,0
2,6L63VW0PibdM1HDSBoqnoM,Chapter 1.18 - Zamek kaniowski,['Seweryn Goszczyński'],0,1928,0.604,0.749,0.22,0.0,0.119,-19.924,0.929,107.177,0.88,104300,-0.184709,4.573615,7
3,6M94FkXd15sOAOQYRnWPN8,Bebamos Juntos - Instrumental (Remasterizado),['Francisco Canaro'],0,1928,0.995,0.781,0.13,0.887,0.111,-14.734,0.0926,108.003,0.72,180760,-1.671672,1.857104,5
4,6N6tiFZ9vLTSOIxkj8qKrd,"Polonaise-Fantaisie in A-Flat Major, Op. 61","['Frédéric Chopin', 'Vladimir Horowitz']",1,1928,0.99,0.21,0.204,0.908,0.098,-16.829,0.0424,62.149,0.0693,687733,-4.018761,-2.63007,0


In [7]:

df_model = df.dropna(subset=feature_cols).copy()
print("Dataset after dropna:", df_model.shape)

Dataset after dropna: (169909, 18)


In [8]:
X = df_model[feature_cols].values.astype("float32")

X_scaled = scaler.transform(X)

with torch.no_grad():
    inputs = torch.tensor(X_scaled, dtype=torch.float32).to(device)
    outputs = model(inputs)
    predicted_classes = torch.argmax(outputs, dim=1).cpu().numpy()

df_model["cluster"] = predicted_classes

print("Clusters assigned to dataset.")

Clusters assigned to dataset.


# Cell 4 - engnieering the output 

In [9]:
import numpy as np

# Drop PCA columns if they exist
df_model = df_model.drop(columns=["pca_x", "pca_y"])

# Compute distances to centroids
df_model['distance_to_centroid'] = 0.0

for cluster_id in df_model["cluster"].unique():
    
    cluster_data = df_model[df_model["cluster"] == cluster_id]
    cluster_features = cluster_data[feature_cols].values
    centroid = cluster_features.mean(axis=0)
    distances = np.linalg.norm(cluster_features - centroid, axis=1)
    df_model.loc[df_model["cluster"] == cluster_id, 'distance_to_centroid'] = distances

# Compute normalized distance within each cluster if we need it
df_model['distance_normalized'] = df_model.groupby('cluster')['distance_to_centroid'] \
                                           .transform(lambda x: (x - x.min()) / (x.max() - x.min()))

# computing metrics for ranking
df_model['rank_distance'] = df_model.groupby('cluster')['distance_to_centroid'] \
                                    .rank(ascending=True)  

df_model['rank_popularity'] = df_model.groupby('cluster')['popularity'] \
                                 .rank(ascending=False)

weight_distance = 0.4
weight_pop = 0.6
df_model['combined_rank'] = weight_distance * (1 - df_model['rank_distance'] / df_model.groupby('cluster')['rank_distance'].transform('max')) + \
                            weight_pop * (1 - df_model['rank_popularity'] / df_model.groupby('cluster')['rank_popularity'].transform('max'))

# final output
df_model.head()

Unnamed: 0,track_id,track_name,artist_name,popularity,year,acousticness,danceability,energy,instrumentalness,liveness,...,speechiness,tempo,valence,duration_ms,cluster,distance_to_centroid,distance_normalized,rank_distance,rank_popularity,combined_rank
0,6KbQ3uYMLKb5jDxLF7wYDD,Singende Bataillone 1. Teil,['Carl Woitschach'],0,1928,0.995,0.708,0.195,0.563,0.151,...,0.0506,118.469,0.779,158648,7,690.155027,0.000939,140.0,9085.5,0.39549
1,6KuQTIu1KoTTkLXKrwlLPV,"Fantasiestücke, Op. 111: Più tosto lento","['Robert Schumann', 'Vladimir Horowitz']",0,1928,0.994,0.379,0.0135,0.901,0.0763,...,0.0462,83.972,0.0767,282133,0,24695.183683,0.004843,1187.5,11308.0,0.365758
2,6L63VW0PibdM1HDSBoqnoM,Chapter 1.18 - Zamek kaniowski,['Seweryn Goszczyński'],0,1928,0.604,0.749,0.22,0.0,0.119,...,0.929,107.177,0.88,104300,7,55038.083982,0.075618,8888.0,9085.5,0.113706
3,6M94FkXd15sOAOQYRnWPN8,Bebamos Juntos - Instrumental (Remasterizado),['Francisco Canaro'],0,1928,0.995,0.781,0.13,0.887,0.111,...,0.0926,108.003,0.72,180760,5,4473.410665,0.004039,2091.0,16162.0,0.358052
4,6N6tiFZ9vLTSOIxkj8qKrd,"Polonaise-Fantaisie in A-Flat Major, Op. 61","['Frédéric Chopin', 'Vladimir Horowitz']",1,1928,0.99,0.21,0.204,0.908,0.098,...,0.0424,62.149,0.0693,687733,0,380904.821651,0.074734,13117.0,8501.5,0.170683


# Cell 4 - Playlist creation function

In [10]:
from collections import Counter
from datetime import datetime
def recommend_playlist(mood, activity, time_of_day, age):

    #moody matrix
    mood_map = {
        "relax": [0,2,4,5],
        "happy": [1,4,7],
        "sad": [2],
        "workout": [3,6],
        "focus": [0,5],
        "party": [1,7]
    }

    activity_map = {
        "study time": [0,5],
        "walking": [1,4,7],
        "running": [6,3],
        "relaxing": [0,2],
        "party": [1,7]
    }

    time_map = {
        "morning": [7,4],
        "afternoon": [1,4],
        "evening": [0,2,5],
        "night": [0,5]
    }

    #cluster selection

    clusters = []
    clusters += mood_map.get(mood, [])
    clusters += activity_map.get(activity, [])
    clusters += time_map.get(time_of_day, [])

    if mood == "party" and activity == "party" and time_of_day == 'night':
        clusters.remove(0)
        clusters.remove(5)
        clusters += [1,7] 

    # first 10/15 songs from weighted clusters based popularity and distance to centroid
    Coun = dict(Counter(clusters))
    cluster_sum = len(clusters)
    if mood in ("party", "happy"):
        playlist_length = 10
    else:
        playlist_length = 15

    weighted_clusters = {}

    for cluster, count in Coun.items():
        weight = count / cluster_sum
        n_songs = max(1, int(weight * playlist_length))
        weighted_clusters[cluster] = n_songs

    candidated_songs = pd.DataFrame()
    
    for cluster, count in weighted_clusters.items():
        candidated_songs = pd.concat([
        candidated_songs,
        df_model[df_model["cluster"] == cluster].sort_values('combined_rank', ascending = False).head(count)
    ])


    # age filtering for party and happy mood
    year = datetime.now().year
    birth_year = int(year) - int(age)
    
    if mood in ("party", "happy"):
        candidated_songs = pd.concat([
        candidated_songs,
        df_model[(df_model["cluster"] == max(Coun, key=Coun.get)) & ((df_model["year"] >= birth_year + 18) & (df_model["year"] <= birth_year + 35)) ].sort_values('popularity', ascending = False).head(5)
    ])
    
    
    return candidated_songs.head(20)


recommend_playlist('party','party','night','24')


Unnamed: 0,track_id,track_name,artist_name,popularity,year,acousticness,danceability,energy,instrumentalness,liveness,...,speechiness,tempo,valence,duration_ms,cluster,distance_to_centroid,distance_normalized,rank_distance,rank_popularity,combined_rank
87959,11VApNQCWLJdzxWrlmwzUa,Say So (feat. Nicki Minaj),"['Doja Cat', 'Nicki Minaj']",89,2020,0.119,0.856,0.641,5e-06,0.427,...,0.204,111.004,0.745,206221,1,276.841432,0.000654,113.0,22.0,0.9978
87981,7wsmIIm0xWmtP7TmACXkJn,Oprah’s Bank Account (Lil Yachty & DaBaby feat...,"['Lil Yachty', 'DaBaby', 'Drake']",83,2020,0.17,0.836,0.617,0.0,0.319,...,0.0534,138.066,0.866,206702,1,205.354543,0.000481,88.0,90.0,0.996624
116670,2lCkncy6bIB0LTMT7kvrD1,Azul,['J Balvin'],86,2020,0.0816,0.843,0.836,0.00138,0.0532,...,0.0695,94.018,0.65,205933,1,565.253728,0.001351,231.0,46.0,0.995479
87552,0KKkJNfGyhkQ5aFogxQAPU,That's What I Like,['Bruno Mars'],81,2016,0.013,0.853,0.56,0.0,0.0944,...,0.0406,134.066,0.86,206693,1,196.009769,0.000458,82.0,149.5,0.995357
126052,0PXukVbYpvz40KcEFKnIw7,Don't Rush (feat. Headie One),"['Young T & Bugsey', 'Headie One']",82,2019,0.202,0.961,0.461,0.00402,0.162,...,0.27,108.028,0.324,207640,1,1142.292349,0.002746,423.0,115.0,0.99102
144052,6tAqYm2Wcy2yrPixShJMS6,Alone,"['SadBoyProlific', 'Ivri']",69,2018,0.481,0.87,0.341,0.0027,0.329,...,0.439,119.054,0.436,160105,7,766.983352,0.001044,161.0,50.5,0.991479
144303,2HnfqILzgKt1CepTlU4oqq,Like No One Does,['Jake Scott'],69,2020,0.352,0.8,0.41,1e-06,0.107,...,0.236,104.941,0.442,158469,7,869.096881,0.001185,179.0,50.5,0.990899
86594,6RFkVsPmrM4pzlDkFswwJl,God's Gonna Cut You Down,['Johnny Cash'],64,2006,0.868,0.617,0.485,3.4e-05,0.115,...,0.115,82.228,0.845,158573,7,765.583374,0.001042,160.0,104.0,0.987978
96040,3gqgSM82j6NMAj4Jllr06T,Vete Ya,['Valentín Elizalde'],69,2003,0.504,0.811,0.323,1.6e-05,0.019,...,0.0744,88.018,0.764,157760,7,1578.223574,0.002159,317.0,50.5,0.986454
95162,373yGkNo74RZgmQgTiR8xK,It's Only A Paper Moon,"['Ella Fitzgerald', 'The Delta Rhythm Boys']",61,1994,0.944,0.761,0.103,0.0,0.127,...,0.0416,114.044,0.53,160133,7,794.934437,0.001083,167.0,166.5,0.983625


In [15]:
from collections import Counter
from datetime import datetime
import pandas as pd
import numpy as np

def recommend_playlist(mood, activity, time_of_day, age):

    #moody matrix
    mood_map = {
        "relax": [0,2,4,5],
        "happy": [1,4,7],
        "sad": [2],
        "workout": [3,6],
        "focus": [0,5],
        "party": [1,7]
    }

    activity_map = {
        "study time": [0,5],
        "walking": [1,4,7],
        "running": [6,3],
        "relaxing": [0,2],
        "party": [1,7]
    }

    time_map = {
        "morning": [7,4],
        "afternoon": [1,4],
        "evening": [0,2,5],
        "night": [0,5]
    }

    #cluster selection

    clusters = []
    clusters += mood_map.get(mood, [])
    clusters += activity_map.get(activity, [])
    clusters += time_map.get(time_of_day, [])

    if mood == "party" and activity == "party" and time_of_day == 'night':
        clusters.remove(0)
        clusters.remove(5)
        clusters += [1,7] 

    # first 10/15 songs from weighted clusters based popularity and distance to centroid
    Coun = dict(Counter(clusters))
    cluster_sum = len(clusters)
    if mood in ("party", "happy"):
        playlist_length = 10
    else:
        playlist_length = 15

    weighted_clusters = {}

    for cluster, count in Coun.items():
        weight = count / cluster_sum
        n_songs = max(1, int(weight * playlist_length))
        weighted_clusters[cluster] = n_songs

    #Take 3× the required number of songs so we have margin after applying filters
    candidated_songs = pd.DataFrame()

    for cluster, count in weighted_clusters.items():
        cluster_songs = (
            df_model[df_model["cluster"] == cluster]
            .sort_values('combined_rank', ascending=False)
            .head(count * 3)
        )
        candidated_songs = pd.concat([candidated_songs, cluster_songs])



    #age filtering for party and happy mood
    year = datetime.now().year
    birth_year = int(year) - int(age)
    
    if mood in ("party", "happy"):
        #Search for songs released during the user's 15–30 age period (youth years)
        youth_start = birth_year + 15
        youth_end = birth_year + 30

        #Only filter if the range is valid (not future)
        if youth_start <= year:
            age_filtered = df_model[
                (df_model["cluster"] == max(Coun, key=Coun.get)) &
                (df_model["year"] >= youth_start) &
                (df_model["year"] <= min(youth_end, year))
            ].sort_values('popularity', ascending=False).head(5)

            candidated_songs = pd.concat([candidated_songs, age_filtered])
    #Avoid duplicate songs
    candidated_songs = candidated_songs.drop_duplicates(subset=['track_id'])
    final_playlist = []
    artist_count = {}
    #Sort by combined_rank to take the best ones
    for _, song in candidated_songs.sort_values('combined_rank', ascending=False).iterrows():
        artist = song['artist_name']
        #Check if this artist already has 2 songs
        if artist_count.get(artist, 0) < 2:
            final_playlist.append(song)
            artist_count[artist] = artist_count.get(artist, 0) + 1
        #Stop when you reach the desired length
        if len(final_playlist) >= 20:
            break

    result = pd.DataFrame(final_playlist)


    
    
    return result


recommend_playlist('party','party','night','24')



Unnamed: 0,track_id,track_name,artist_name,popularity,year,acousticness,danceability,energy,instrumentalness,liveness,...,speechiness,tempo,valence,duration_ms,cluster,distance_to_centroid,distance_normalized,rank_distance,rank_popularity,combined_rank
87959,11VApNQCWLJdzxWrlmwzUa,Say So (feat. Nicki Minaj),"['Doja Cat', 'Nicki Minaj']",89,2020,0.119,0.856,0.641,5e-06,0.427,...,0.204,111.004,0.745,206221,1,276.841432,0.000654,113.0,22.0,0.9978
87981,7wsmIIm0xWmtP7TmACXkJn,Oprah’s Bank Account (Lil Yachty & DaBaby feat...,"['Lil Yachty', 'DaBaby', 'Drake']",83,2020,0.17,0.836,0.617,0.0,0.319,...,0.0534,138.066,0.866,206702,1,205.354543,0.000481,88.0,90.0,0.996624
116670,2lCkncy6bIB0LTMT7kvrD1,Azul,['J Balvin'],86,2020,0.0816,0.843,0.836,0.00138,0.0532,...,0.0695,94.018,0.65,205933,1,565.253728,0.001351,231.0,46.0,0.995479
87552,0KKkJNfGyhkQ5aFogxQAPU,That's What I Like,['Bruno Mars'],81,2016,0.013,0.853,0.56,0.0,0.0944,...,0.0406,134.066,0.86,206693,1,196.009769,0.000458,82.0,149.5,0.995357
144052,6tAqYm2Wcy2yrPixShJMS6,Alone,"['SadBoyProlific', 'Ivri']",69,2018,0.481,0.87,0.341,0.0027,0.329,...,0.439,119.054,0.436,160105,7,766.983352,0.001044,161.0,50.5,0.991479
126052,0PXukVbYpvz40KcEFKnIw7,Don't Rush (feat. Headie One),"['Young T & Bugsey', 'Headie One']",82,2019,0.202,0.961,0.461,0.00402,0.162,...,0.27,108.028,0.324,207640,1,1142.292349,0.002746,423.0,115.0,0.99102
144303,2HnfqILzgKt1CepTlU4oqq,Like No One Does,['Jake Scott'],69,2020,0.352,0.8,0.41,1e-06,0.107,...,0.236,104.941,0.442,158469,7,869.096881,0.001185,179.0,50.5,0.990899
144207,7LzouaWGFCy4tkXDOOnEyM,Liar,['Camila Cabello'],78,2019,0.0169,0.74,0.498,0.00282,0.319,...,0.0456,98.016,0.652,207039,1,541.599266,0.001294,224.5,306.5,0.989634
87581,6g0Orsxv6glTJCt4cHsRsQ,Formation,['Beyoncé'],77,2016,0.00532,0.896,0.621,0.0,0.196,...,0.237,121.966,0.818,206080,1,417.774828,0.000994,174.0,378.0,0.988762
97569,7ycWLEP1GsNjVvcjawXz3z,Praise The Lord (Da Shine) (feat. Skepta),"['A$AP Rocky', 'Skepta']",81,2018,0.0609,0.85,0.569,0.0816,0.1,...,0.136,80.02,0.294,205040,1,1458.220179,0.00351,530.0,149.5,0.988625
