In [1]:
import itertools
import pandas as pd

from recommender_05 import (
    build_target_profile,
    predict_subcluster_from_profile,
    df  # il df globale che hai già caricato lì
)

# Liste di valori che il BOT effettivamente usa o potrebbe usare
MOODS = [
    "happy", "sad", "relaxed", "angry",
    "kids", "christmas", "religious",
    "chill"   # se vuoi usare il mood "chill" oltre che come activity
]

ACTIVITIES = [
    "party", "study", "gym", "commute", "chill", "work", "reading", "run"
]

PARTS = ["morning", "afternoon", "evening", "night"]
WEATHERS = ["sunny", "rainy", "snow", "cloudy", "stormy"]

AGE = 25
EXPLORER = False  # puoi cambiare a True per vedere differenze

def scan_combos():
    rows = []

    for m, a, d, w in itertools.product(MOODS, ACTIVITIES, PARTS, WEATHERS):
        profile, (year_pref, year_low, year_high) = build_target_profile(
            mood=m,
            activity=a,
            weather=w,
            part_of_day=d,
            age=AGE,
            explorer=EXPLORER,
            df_global=df
        )

        subcluster_pred, probs = predict_subcluster_from_profile(profile)

        rows.append({
            "mood": m,
            "activity": a,
            "part_of_day": d,
            "weather": w,
            "subcluster_pred": subcluster_pred
        })

    df_dbg = pd.DataFrame(rows)
    return df_dbg

if __name__ == "__main__":
    df_dbg = scan_combos()

    print("Counts per subcluster:")
    print(df_dbg["subcluster_pred"].value_counts().sort_index())

    # Esempio: mostra tutte le combo che finiscono in 2_4
    print("\nCombos that hit subcluster 2_4:")
    print(df_dbg[df_dbg["subcluster_pred"] == "2_4"].head(20))

    print("\nCombos that hit subcluster 2_5:")
    print(df_dbg[df_dbg["subcluster_pred"] == "2_5"].head(20))

    print("\nCombos that hit subcluster 0_0:")
    print(df_dbg[df_dbg["subcluster_pred"] == "0_0"].head(20))

    print("\nCombos that hit subcluster 0_1:")
    print(df_dbg[df_dbg["subcluster_pred"] == "0_1"].head(20))

DATA_PROCESSED_DIR: /Users/Vitaliano/Desktop/Data Science/FDS - Fundamentals of Data Science/Homework di Gruppo/Final Project/Music-mood pred/data/processed
MODEL_DIR: /Users/Vitaliano/Desktop/Data Science/FDS - Fundamentals of Data Science/Homework di Gruppo/Final Project/Music-mood pred/models
CSV path: /Users/Vitaliano/Desktop/Data Science/FDS - Fundamentals of Data Science/Homework di Gruppo/Final Project/Music-mood pred/data/processed/spotify_dataset_clustered.csv
Counts per subcluster:
subcluster_pred
0_0     30
1_0     30
1_1    654
1_2    405
2_0     67
2_2      5
2_3     32
2_5     57
Name: count, dtype: int64

Combos that hit subcluster 2_4:
Empty DataFrame
Columns: [mood, activity, part_of_day, weather, subcluster_pred]
Index: []

Combos that hit subcluster 2_5:
        mood activity part_of_day weather subcluster_pred
10     happy    party     evening   sunny             2_5
15     happy    party       night   sunny             2_5
50     happy      gym     evening   sunny 

In [4]:
df_dbg["subcluster_pred"].value_counts().sort_index()
df_dbg[df_dbg["subcluster_pred"] == "0_0"].head()
df_dbg[df_dbg["subcluster_pred"] == "1_0"].head()
df_dbg[df_dbg["subcluster_pred"] == "1_2"].head()
df_dbg[df_dbg["subcluster_pred"] == "2_5"].head()

Unnamed: 0,mood,activity,part_of_day,weather,subcluster_pred
10,happy,party,evening,sunny,2_5
15,happy,party,night,sunny,2_5
50,happy,gym,evening,sunny,2_5
55,happy,gym,night,sunny,2_5
150,happy,run,evening,sunny,2_5
