# Requirements

In [None]:
!pip install python-dotenv pandas transformers torch sentence-transformers scikit-learn numpy openai spotipy flask

In [6]:
!pip install uvicorn fastapi watchfiles

Collecting uvicorn
  Downloading uvicorn-0.35.0-py3-none-any.whl.metadata (6.5 kB)
Collecting fastapi
  Downloading fastapi-0.116.1-py3-none-any.whl.metadata (28 kB)
Collecting watchfiles
  Downloading watchfiles-1.1.0-cp313-cp313-win_amd64.whl.metadata (5.0 kB)
Collecting starlette<0.48.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.47.3-py3-none-any.whl.metadata (6.2 kB)
Downloading uvicorn-0.35.0-py3-none-any.whl (66 kB)
Downloading fastapi-0.116.1-py3-none-any.whl (95 kB)
Downloading starlette-0.47.3-py3-none-any.whl (72 kB)
Downloading watchfiles-1.1.0-cp313-cp313-win_amd64.whl (292 kB)
Installing collected packages: watchfiles, uvicorn, starlette, fastapi

   ---------- ----------------------------- 1/4 [uvicorn]
   ---------- ----------------------------- 1/4 [uvicorn]
   ---------- ----------------------------- 1/4 [uvicorn]
   -------------------- ------------------- 2/4 [starlette]
   -------------------- ------------------- 2/4 [starlette]
   -------------------- ------

# Spotify Data Extraction

The first step is to use the prepared Spotify module to extract the data and save it in an SQLite file. (Alternatively, you can configure a MySQL file using the mysql.py wrapper in the libraries folder, or create your own).

In [2]:
import os
import time
import threading
import webbrowser
from typing import Optional, Dict, Any

from flask import Flask, request

# Spotify logic (PKCE-only helpers and iterators)
from libraries.spotify import get_spotify_client, iter_saved_tracks

# SQLite backend
from libraries.sqlite import (
    create_connection,
    ensure_database_and_tables,
    upsert_artist,
    upsert_album,
    upsert_track,
)

# =========================
# Configuration (edit here)
# =========================
CLIENT_ID: str = "ec6c43ef03034d7393a4906cd1df5f06"         # do not commit real values to public repos
REDIRECT_URI: str = "https://spotify-auth.viant.dev/callback"  # must match Spotify dashboard
SCOPE: str = "user-library-read"
CACHE_PATH: str = ".cache"                        # per-user cache file
LOCAL_CALLBACK_PORT: int = 8080                  # must match your Vercel relay target (127.0.0.1:<PORT>)

# =========================
# Server state (global)
# =========================
SERVER_STATE: Dict[str, Optional[Any]] = {"server": None, "thread": None}


def build_spotify_pkce(
    client_id: str,
    redirect_uri: str,
    scope: str,
    cache_path: str,
    open_browser: bool = False,
):
    """Return a PKCE auth manager (Spotipy SpotifyPKCE)."""
    from spotipy.oauth2 import SpotifyPKCE  # PKCE class (no client_secret)
    return SpotifyPKCE(
        client_id=client_id,
        redirect_uri=redirect_uri,
        scope=scope,
        open_browser=open_browser,  # we manually open the browser
        cache_path=cache_path,
    )


def wait_for_token(auth_mgr, timeout: int = 300, interval: float = 1.0) -> bool:
    """Poll the cache until a valid token exists or timeout expires."""
    start = time.time()
    while time.time() - start < timeout:
        try:
            token = auth_mgr.get_cached_token()
            if token and token.get("access_token"):
                return True
        except Exception:
            # Some spotipy versions may raise if cache is empty; ignore and keep polling
            pass
        time.sleep(interval)
    return False


def run_server(app: Flask) -> None:
    """Run Flask using a Werkzeug WSGI server we can stop programmatically."""
    from werkzeug.serving import make_server
    httpd = make_server("127.0.0.1", LOCAL_CALLBACK_PORT, app)
    SERVER_STATE["server"] = httpd
    # Blocks current thread; intended to be called from a background thread.
    httpd.serve_forever()


def stop_server() -> None:
    """Stop the local auth callback server if it is running."""
    server = SERVER_STATE.get("server")
    thread = SERVER_STATE.get("thread")

    if server is None:
        print("ℹ️ No server is currently running.")
        return

    try:
        server.shutdown()  # Properly shut down the Werkzeug server
    except Exception as e:
        print(f"⚠️ Failed to shutdown server cleanly: {e}")

    if thread and thread.is_alive():
        thread.join(timeout=3)

    SERVER_STATE.update({"server": None, "thread": None})
    print("🛑 Server stopped.")


def ensure_pkce_authorized(client_id: str, redirect_uri: str, scope: str, cache_path: str) -> None:
    """
    Ensure PKCE flow is completed.
    If a valid token is already cached, nothing happens.
    Otherwise:
      - starts a local Flask server on 127.0.0.1:<PORT>/callback
      - opens the Spotify consent page
      - exchanges the 'code' and stores tokens in cache
      - blocks until the token is available or timeout
    """
    auth = build_spotify_pkce(
        client_id=client_id,
        redirect_uri=redirect_uri,
        scope=scope,
        cache_path=cache_path,
        open_browser=False,
    )

    # If token already cached, skip auth
    try:
        token = auth.get_cached_token()
        if token and token.get("access_token"):
            return
    except Exception:
        pass

    app = Flask(__name__)

    @app.route("/callback")
    def callback():
        code = request.args.get("code")
        if not code:
            return "Missing authorization code.", 400
        # Complete token exchange and persist to cache
        auth.get_access_token(code, check_cache=False)
        return "Auth OK. You can close this tab."

    # Start local receiver with a stoppable server and open consent page
    t = threading.Thread(target=run_server, args=(app,), daemon=True)
    t.start()
    SERVER_STATE["thread"] = t

    auth_url = auth.get_authorize_url()
    webbrowser.open_new_tab(auth_url)
    print(f"Open this URL if the browser did not pop up:\n{auth_url}")
    print(f"Waiting for callback on http://127.0.0.1:{LOCAL_CALLBACK_PORT}/callback ...")

    # Block until token is present (or timeout)
    ok = wait_for_token(auth, timeout=300, interval=1.0)

    # Always stop the local server to free the port
    stop_server()

    if not ok:
        raise TimeoutError("PKCE authorization timed out. Try again or check redirect/callback setup.")


def run_import(client_id: str, redirect_uri: str, scope: str, cache_path: str) -> None:
    """Fetch saved tracks from Spotify and persist them using selected backend."""
    # Build a Spotify client using the same config used for PKCE cache
    sp = get_spotify_client(
        client_id=client_id,
        redirect_uri=redirect_uri,
        scope=scope,
        cache_path=cache_path,
        open_browser=False,   # no need; token already cached
    )

    conn = create_connection()
    ensure_database_and_tables(conn)

    count = 0
    for row in iter_saved_tracks(sp, page_limit=50):
        a = row["artist"]
        al = row["album"]
        t = row["track"]

        upsert_artist(conn, a["spotify_id"], a["name"])
        upsert_album(
            conn,
            al["spotify_id"],
            al["name"],
            al["artist_spotify_id"],
            al["image_url"],
            al["image_height"],
            al["image_width"],
        )
        upsert_track(
            conn,
            t["spotify_id"],
            t["name"],
            t["popularity"],
            t["href"],
            t["artist_spotify_id"],
            t["album_spotify_id"],
        )
        count += 1

    conn.close()
    print(f"Imported or updated {count} tracks.")

# =========================
# Entry point
# =========================
if __name__ == "__main__":
    print("Authorizing...")

    # 1) Complete PKCE (opens browser if cache is empty, waits until token is stored)
    ensure_pkce_authorized(
        client_id=CLIENT_ID,
        redirect_uri=REDIRECT_URI,
        scope=SCOPE,
        cache_path=CACHE_PATH,
    )

    print("Start to import tracks")

    # 2) Run the import using the cached token
    run_import(
        client_id=CLIENT_ID,
        redirect_uri=REDIRECT_URI,
        scope=SCOPE,
        cache_path=CACHE_PATH,
    )


Authorizing...
Open this URL if the browser did not pop up:
https://accounts.spotify.com/authorize?client_id=ec6c43ef03034d7393a4906cd1df5f06&response_type=code&redirect_uri=https%3A%2F%2Fspotify-auth.viant.dev%2Fcallback&code_challenge_method=S256&code_challenge=riEaLZ0dtNY6ejcew5Dsa2hqccX_rvRgsakgm5T7Uzs&scope=user-library-read
Waiting for callback on http://127.0.0.1:8080/callback ...


127.0.0.1 - - [06/Sep/2025 20:01:02] "GET /callback?code=AQCkvtuK3dkJrJl6e5a3K5ztPfY-1wWfpNFyIrbV-62taOLSEugvOG8ZPMoD_gnd78QD0299F26gsquddZ4qygaYrkhBlIcN0RBF76JJBjJkaAVHDzF_bAcv5svn6gQoQNDRQLzZFARM-zBvLI4b2N_Z2is9fJVKxFfzg0A_swyDeeSR0AuudJJrwqvXzz-lsdVlEncI4uNaSaEtXL5XzHOOJZAhvTJMVk3x0lZc7kkrS3AfNHUDdRww6TKuk5PdMcm7jIQER9t3L52XCNMgBFg HTTP/1.1" 200 -


🛑 Server stopped.
Start to import tracks
Imported or updated 911 tracks.


# Keywords Extraction

With our Spotify data loaded, we proceed to extract our playlist (currently limited to only 1,000 songs, but it will still give us a broad view of what we listen to). We’ll do this through an LLM. Remember to add your API key to your .env file under OPENAI_API_KEY. This process may take several minutes to complete.

What we did in this step was use AI to generate keywords from the song title and name. Ideally, this should be done with the lyrics, but since there is no public source for them, AI can help us instead.

If you want to use the lyrics for a more accurate analysis, use the `extract_from_lyrics` function inside `extract_keywords`. To do so, you must have populated the database with the lyrics in the `lyrics` field of the `Tracks` table.

### Cost Estimate

The average cost of processing 1,000 records in OpenAI is approximately **$0.40 USD or less**, depending on your dataset.

In [None]:
import extract_keywords.extract_from_title_artist as keywords

keywords.extract_keywords_from_title_artist()

# Sentimental Analysis for Spotify Playlist

## Exploratory Data Analysis

In [4]:
import libraries.sqlite as db
import pandas as pd

We retrieve the data from the database

In [5]:
conn = db.create_connection()
tracks = db.fetch_tracks_dataset(conn)
conn.close()
print(len(tracks))

911


We build the dataframe to better analyze the data.

In [6]:
df = pd.DataFrame(tracks, columns=["track_spotify_id", "artist_name", "title", "keywords"])
df.head(10)

Unnamed: 0,track_spotify_id,artist_name,title,keywords
0,00AoRZ8103mVeOVfpTfGuR,Luny Tunes,Mayor Que Yo 3,"[romantic tension, reggaeton vibe, urban night..."
1,00BuKLSAFkaEkaVAgIMbeA,Lady Gaga,Telephone,"[communication, urgency, escape, party, connec..."
2,00i0O74dXdaKKdCrqHnfXm,Ricky Martin,La Mordidita (feat. Yotuel),"[passion, dance, seduction, energy, rhythm, La..."
3,00iQcGMeC6agUvBjHdkAAM,La Oreja de Van Gogh,Un Cuento Sobre el Agua,"[nostalgia, reflection, melancholy, love, memo..."
4,017PF4Q3l4DBUiWoXk4OWT,Dua Lipa,Break My Heart,"[heartbreak, love, vulnerability, emotional pa..."
5,01YedSX5OYtSCWdO1DRhvY,La Oreja de Van Gogh,20 de Enero,"[love, memory, nostalgia, heartbreak, reflecti..."
6,01uqI4H13Gsd8Lyl1EYd8H,Macklemore & Ryan Lewis,Same Love (feat. Mary Lambert),"[equality, love, acceptance, social justice, L..."
7,02JIdsrod3BYucThfUFDUX,Camille,Le Festin,"[hope, dreams, perseverance, joy, inspiration,..."
8,02XnQdf7sipaKBBHixz3Zp,Lady Gaga,Paparazzi,"[obsession, fame, media, love, danger, pop, da..."
9,02bKaAG61tMw9c63fzKXal,Alex Ubago,Sin miedo a nada (feat. Amaia Montero),"[fearless, love, courage, hope, commitment, ro..."


Now we are going to create a dictionary based on the main keywords to normalize them. Using embeddings, we can adjust the threshold to make the group larger or smaller—the lower the number, the larger the group. In this case, I set it to 0.4.

In [7]:
import itertools
import json
from sentence_transformers import SentenceTransformer
from sklearn.cluster import AgglomerativeClustering

# ========= 1) Create a unique keywords list =========
all_kws = list(itertools.chain.from_iterable(df['keywords']))
unique_kws = sorted({
    " ".join(str(k).lower().split())
    for k in all_kws
    if isinstance(k, str) and k.strip()
})

print(f"Total unique keywords: {len(unique_kws)}")

# ========= 2) Embeddings =========
model = SentenceTransformer('all-MiniLM-L6-v2')
emb = model.encode(unique_kws, normalize_embeddings=True)

# ========= 3) Hierarchical cluster =========
clu = AgglomerativeClustering(
    n_clusters=None,
    metric='cosine',
    linkage='average',
    distance_threshold=0.4  # ajusta el threshold: más pequeño = clusters más finos
)
labels = clu.fit_predict(emb)

# ========= 4) Build the clusters =========
clusters = {}
for kw, lab in zip(unique_kws, labels):
    clusters.setdefault(lab, []).append(kw)


# ========= 5) Choose canonical representative per cluster =========
def pick_rep(words):
    # heurística: palabra con menos tokens, si empata, la más corta
    return sorted(words, key=lambda w: (len(w.split()), len(w)))[0]


suggested_norm = {}
for words in clusters.values():
    rep = pick_rep(words)
    for w in words:
        suggested_norm[w] = rep

# ========= 6) Save suggested dictionary =========
with open("keyword_norm_suggested.json", "w", encoding="utf-8") as f:
    json.dump(suggested_norm, f, indent=2, ensure_ascii=False)

print("Example of normalized (First 20):")
for k, v in list(suggested_norm.items())[:20]:
    print(f"{k} -> {v}")


  from .autonotebook import tqdm as notebook_tqdm


Total unique keywords: 1637
Example of normalized (First 20):
1960s -> 70s
1970s -> 70s
1970s style -> 70s
1970s vibe -> 70s
70s -> 70s
1980s -> 80s
1980s vibe -> 80s
80s -> 80s
80s pop -> 80s
80s pop rock -> 80s
80s rock -> 80s
80s style -> 80s
80s vibe -> 80s
90s -> 80s
90s alternative -> 80s
90s vibe -> 80s
90s-2000s vibe -> 80s
1980s influence -> 80s influence
80s influence -> 80s influence
2000s -> 2000s


Count unique normalized keywords (the canonical representatives). If the number of suggested words is considered optimal compared to the number of unique words, we can proceed; otherwise, we adjust the threshold.

In [8]:
unique_norm = set(suggested_norm.values())
print(f"{len(unique_kws)} unique words - suggested words {len(unique_norm)}")

1637 unique words - suggested words 845


Keeps the original 'keywords' column and adds a new column
    'normalized_keywords' with normalized keywords as a list (unique, no duplicates).

In [9]:
def apply_normalization_array(df: pd.DataFrame, norm_dict: dict) -> pd.DataFrame:
    df_copy = df.copy()
    df_copy["normalized_keywords"] = df_copy["keywords"].apply(
        lambda kws: list({
            norm_dict.get(str(k).lower().strip(), str(k).lower().strip())
            for k in kws if isinstance(k, str) and k.strip()
        })
    )
    return df_copy


df_norm = apply_normalization_array(df, suggested_norm)
df_norm[["title", "artist_name", "keywords", "normalized_keywords"]].head(5)

Unnamed: 0,title,artist_name,keywords,normalized_keywords
0,Mayor Que Yo 3,Luny Tunes,"[romantic tension, reggaeton vibe, urban night...","[modern, party, romance, flirty, interaction, ..."
1,Telephone,Lady Gaga,"[communication, urgency, escape, party, connec...","[escape, electro, modern, vibrant, party, rela..."
2,La Mordidita (feat. Yotuel),Ricky Martin,"[passion, dance, seduction, energy, rhythm, La...","[fun, vibrant, love, party, flirty, nightlife,..."
3,Un Cuento Sobre el Agua,La Oreja de Van Gogh,"[nostalgia, reflection, melancholy, love, memo...","[melody, love, rain imagery, romance, moment, ..."
4,Break My Heart,Dua Lipa,"[heartbreak, love, vulnerability, emotional pa...","[modern, love, regret, relationship, romance, ..."


We can see the top keywords, and if we want, we can go back to the dictionary creation step to re-normalize with another threshold (this is the key).

In [10]:
import itertools
from collections import Counter


def top_normalized_keywords(df: pd.DataFrame, top_n: int = 20) -> pd.DataFrame:
    """
    Returns the top N most frequent normalized keywords across all tracks.
    """
    all_kws = list(itertools.chain.from_iterable(df["normalized_keywords"]))
    counter = Counter(all_kws)
    top = counter.most_common(top_n)
    return pd.DataFrame(top, columns=["keyword", "count"])


top_keywords_df = top_normalized_keywords(df_norm, top_n=20)
top_keywords_df


Unnamed: 0,keyword,count
0,emotion,642
1,passion,459
2,mood,450
3,energy,441
4,love,432
5,romance,427
6,pop,409
7,rhythm,389
8,desire,380
9,melody,348


To simplify the process, we will save the DataFrame with normalized keywords into a CSV file.

In [11]:
df_norm.to_csv("tracks_with_normalized_keywords.csv", index=False, encoding="utf-8")

We also save our normalized keywords in the database to ensure persistence.

In [12]:
conn = db.create_connection()
db.ensure_normalized_keywords_column(conn)
db.update_normalized_keywords(conn, df_norm)
conn.close()

Column 'normalized_keywords' already exists.
911 rows updated.


## Sentimental Analysis

Utilities functions

In [13]:
import os
from transformers import pipeline
import json
import ast
import pandas as pd
from collections import defaultdict
import libraries.sqlite as db

# -------------------------
# Utilities
# -------------------------
def load_df_if_needed(df: pd.DataFrame | None = None,
                      csv_path: str = "tracks_with_normalized_keywords.csv") -> pd.DataFrame:
    """
    Returns a DataFrame ready for processing.
    If df is None OR df does not contain 'normalized_keywords', tries to load from CSV.
    """
    if df is None or "normalized_keywords" not in df.columns:
        if not os.path.exists(csv_path):
            raise FileNotFoundError(
                f"DataFrame is missing and '{csv_path}' was not found. "
                "Provide a DataFrame with columns: track_spotify_id, artist_name, title, keywords (list)."
            )
        df = pd.read_csv(csv_path)
    return df


def ensure_keywords_list(df: pd.DataFrame, col: str = "keywords") -> pd.DataFrame:
    """
    Robustly converts the column `col` into a Python list[str] per row.
    Handles:
      - JSON lists: ["love","party"]
      - Python repr lists: ['love', 'party']
      - Double-escaped JSON (e.g., '"[\"love\",\"party\"]"')
      - Comma-separated fallbacks: love, party
      - NaN/None → []
    """
    if col not in df.columns:
        raise KeyError(f"Column '{col}' not found in DataFrame.")

    def _strip_outer_quotes(s: str) -> str:
        # Remove a single pair of wrapping quotes if present: '"[...]' or "'[...]'"
        if len(s) >= 2 and ((s[0] == s[-1] == '"') or (s[0] == s[-1] == "'")):
            return s[1:-1]
        return s

    def _parse(x):
        # Already a list
        if isinstance(x, list):
            return [str(t).strip() for t in x if isinstance(t, (str, int, float)) and str(t).strip()]

        # Missing
        if x is None or (isinstance(x, float) and pd.isna(x)):
            return []

        # String-like
        if isinstance(x, str):
            s = x.strip()
            if not s:
                return []

            # Try to un-wrap if double-escaped: e.g., '"[\"love\",\"party\"]"'
            s_unwrapped = _strip_outer_quotes(s)

            # 1) Try JSON
            for candidate in (s, s_unwrapped):
                if candidate.startswith("[") and candidate.endswith("]"):
                    try:
                        v = json.loads(candidate)
                        if isinstance(v, list):
                            return [str(t).strip() for t in v if str(t).strip()]
                    except Exception:
                        pass

            # 2) Try Python literal (handles single quotes lists)
            for candidate in (s, s_unwrapped):
                if candidate.startswith("[") and candidate.endswith("]"):
                    try:
                        v = ast.literal_eval(candidate)
                        if isinstance(v, list):
                            return [str(t).strip() for t in v if str(t).strip()]
                    except Exception:
                        pass

            # 3) Fallback: comma-separated
            parts = [p.strip().strip("'").strip('"') for p in s.split(",")]
            return [p for p in parts if p]

        # Anything else → try stringify
        try:
            s = str(x).strip()
            if s.startswith("[") and s.endswith("]"):
                v = ast.literal_eval(s)
                if isinstance(v, list):
                    return [str(t).strip() for t in v if str(t).strip()]
        except Exception:
            pass
        return []

    out = df.copy()
    out[col] = out[col].apply(_parse)
    return out


# -------------------------
# Emotion analysis (model-based, no lexicon)
# -------------------------
# Load a pre-trained emotion classifier (GoEmotions fine-tuned DistilRoBERTa)
# 1) Multi-label emotion pipeline (GoEmotions)
_emotion_pipe = None


def get_emotion_pipeline_multilabel():
    """
    Multi-label emotion classifier based on GoEmotions.
    Uses sigmoid (not softmax) under the hood.
    """
    global _emotion_pipe
    if _emotion_pipe is None:
        _emotion_pipe = pipeline(
            "text-classification",
            model="joeddav/distilbert-base-uncased-go-emotions-student",
            return_all_scores=True,
            top_k=None,  # return all labels with scores
            truncation=True
        )
    return _emotion_pipe


# 2) Utility: classify a list of keywords IN BATCH, then aggregate per label
def analyze_emotion_from_keywords_multilabel(
        keywords: list[str],
        min_score_threshold: float = 0.15,  # keep labels with mean score >= threshold
        top_k: int = 5,  # return top-k emotions after threshold
        batch_size: int = 32,
        max_keywords: int | None = 100  # cap to avoid very long batches
) -> list[dict]:
    """
    Multi-label emotion analysis:
      - Classifies each keyword separately (batched).
      - Averages scores per emotion across keywords.
      - Filters by threshold and returns top_k labels.

    Returns a list of dicts: [{"label": "...", "score": 0.xx}, ...]
    """
    if not keywords:
        return []

    # Clean and cap keywords
    kws = [str(k).strip() for k in keywords if isinstance(k, str) and str(k).strip()]
    if not kws:
        return []
    if max_keywords is not None:
        kws = kws[:max_keywords]

    pipe = get_emotion_pipeline_multilabel()

    # Batch inference
    all_scores = pipe(kws, batch_size=batch_size)  # list of list[{"label","score"}]

    # Aggregate scores per label (mean over keywords)
    # Initialize label space from the first result
    if not all_scores or not all_scores[0]:
        return []

    label_scores = defaultdict(list)
    labels = [d["label"] for d in all_scores[0]]
    for per_kw in all_scores:
        for d in per_kw:
            label_scores[d["label"]].append(float(d["score"]))

    mean_scores = {lab: (sum(vals) / len(vals)) for lab, vals in label_scores.items()}

    # Threshold and sort
    filtered = [{"label": lab, "score": sc} for lab, sc in mean_scores.items() if sc >= min_score_threshold]
    filtered.sort(key=lambda x: x["score"], reverse=True)

    # Take top_k (if threshold filters too much and nothing remains, fall back to top_k without threshold)
    if not filtered:
        fallback = [{"label": lab, "score": sc} for lab, sc in mean_scores.items()]
        fallback.sort(key=lambda x: x["score"], reverse=True)
        return fallback[:top_k]
    return filtered[:top_k]


# 3) Attach emotions to your DataFrame using the ORIGINAL 'keywords' column
def attach_emotions_multilabel(df: pd.DataFrame, top_k: int = 5) -> pd.DataFrame:
    """
    Adds 'emotions' column with the aggregated multi-label results
    computed from the 'keywords' column for each row.
    """
    df_out = df.copy()
    df_out["emotions"] = df_out["keywords"].apply(
        lambda kws: analyze_emotion_from_keywords_multilabel(
            kws,
            min_score_threshold=0.15,
            top_k=top_k
        )
    )
    return df_out

We are going to perform sentiment analysis. To do this, we’ll use a pre-trained model: joeddav/distilbert-base-uncased-go-emotions-student. This model will help us extract the emotions from our keywords. We can define how many emotions we want to retrieve by setting the top_k parameter in our attach_emotions_multilabel function. Depending on this value, we can decide how many emotions to visualize and how deep we want the analysis to be.

In [14]:
# If you already have a DataFrame named `df`, pass it directly to attach_emotions(df, top_k=3).
# Otherwise, load from CSV (expects at least: track_spotify_id, artist_name, title, keywords):
try:
    df = load_df_if_needed(df=None, csv_path="tracks_with_normalized_keywords.csv")
except FileNotFoundError as e:
    print(e)
    raise

# Ensure keywords are lists, then attach emotions
df = ensure_keywords_list(df, col="keywords")
df = attach_emotions_multilabel(df, top_k=28)  # Top K depends on how many emotions the dataset can describe.

# Inspect result (emotions column contains top-3 emotions with scores per track)
df[["track_spotify_id", "artist_name", "title", "emotions"]].head(10)

Device set to use cpu


Unnamed: 0,track_spotify_id,artist_name,title,emotions
0,00AoRZ8103mVeOVfpTfGuR,Luny Tunes,Mayor Que Yo 3,"[{'label': 'excitement', 'score': 0.1183473470..."
1,00BuKLSAFkaEkaVAgIMbeA,Lady Gaga,Telephone,"[{'label': 'realization', 'score': 0.078663845..."
2,00i0O74dXdaKKdCrqHnfXm,Ricky Martin,La Mordidita (feat. Yotuel),"[{'label': 'excitement', 'score': 0.1284560971..."
3,00iQcGMeC6agUvBjHdkAAM,La Oreja de Van Gogh,Un Cuento Sobre el Agua,"[{'label': 'caring', 'score': 0.09976261936128..."
4,017PF4Q3l4DBUiWoXk4OWT,Dua Lipa,Break My Heart,"[{'label': 'desire', 'score': 0.09165769649669..."
5,01YedSX5OYtSCWdO1DRhvY,La Oreja de Van Gogh,20 de Enero,"[{'label': 'caring', 'score': 0.09090986732393..."
6,01uqI4H13Gsd8Lyl1EYd8H,Macklemore & Ryan Lewis,Same Love (feat. Mary Lambert),"[{'label': 'realization', 'score': 0.136109698..."
7,02JIdsrod3BYucThfUFDUX,Camille,Le Festin,"[{'label': 'optimism', 'score': 0.102420230284..."
8,02XnQdf7sipaKBBHixz3Zp,Lady Gaga,Paparazzi,"[{'label': 'excitement', 'score': 0.0882992495..."
9,02bKaAG61tMw9c63fzKXal,Alex Ubago,Sin miedo a nada (feat. Amaia Montero),"[{'label': 'caring', 'score': 0.14290625721216..."


We verify what we obtained.

In [15]:
# Get the first non-empty emotions entry
first_non_empty = df.loc[df["emotions"].map(lambda x: isinstance(x, list) and len(x) > 0), "emotions"].iloc[0]
second = df.loc[df["emotions"].map(lambda x: isinstance(x, list) and len(x) > 0), "emotions"].iloc[1]

# Pretty print as JSON for readability
print(json.dumps(first_non_empty, indent=2))
print(json.dumps(second, indent=2))


[
  {
    "label": "excitement",
    "score": 0.11834734700620174
  },
  {
    "label": "desire",
    "score": 0.11584901049733162
  },
  {
    "label": "realization",
    "score": 0.06584796600043774
  },
  {
    "label": "curiosity",
    "score": 0.056092655062675474
  },
  {
    "label": "pride",
    "score": 0.04987131379544735
  },
  {
    "label": "caring",
    "score": 0.04905091166496277
  },
  {
    "label": "joy",
    "score": 0.04893150057643652
  },
  {
    "label": "approval",
    "score": 0.045935837030410764
  },
  {
    "label": "amusement",
    "score": 0.04559797925874591
  },
  {
    "label": "love",
    "score": 0.04034377865493297
  },
  {
    "label": "relief",
    "score": 0.03395199995487928
  },
  {
    "label": "optimism",
    "score": 0.032062261253595355
  },
  {
    "label": "admiration",
    "score": 0.03173048753291369
  },
  {
    "label": "surprise",
    "score": 0.028245389647781848
  },
  {
    "label": "nervousness",
    "score": 0.02772440867498517


We will denormalize, create an appropriate table, and insert the extracted emotions for later analysis.

1. Discover unique emotions from df

In [16]:
import re


def all_emotions_from_df(df):
    """
    Returns a sorted list of unique emotion labels present in df['emotions'].
    """
    labels = set()
    for lst in df["emotions"]:
        if isinstance(lst, list):
            for d in lst:
                lab = str(d.get("label", "")).strip().lower()
                if lab:
                    labels.add(lab)
    return sorted(labels)


def to_snake(s: str) -> str:
    """
    Safe snake_case for MySQL column names.
    """
    s = s.strip().lower()
    s = re.sub(r"[^a-z0-9]+", "_", s)
    return re.sub(r"_+", "_", s).strip("_")


emotions = all_emotions_from_df(df)
emotion_cols = [to_snake(e) for e in emotions]
print(len(emotion_cols), "emotions discovered")


28 emotions discovered


 2. Build DDL for a wide, denormalized table

In [17]:
# Example: run once in MySQL
table_name = "track_emotions_wide"
ddl = db.build_create_table_sql(table_name, emotion_cols)

3. Pivot df to wide (percentages per emotion)

In [18]:
import pandas as pd


def df_to_emotion_wide(df: pd.DataFrame, emotions: list[str]) -> pd.DataFrame:
    """
    Returns a wide DataFrame with one column per emotion (percentage 0–100),
    filling 0 where the emotion is absent.
    """
    cols = ["track_spotify_id"] + [to_snake(e) for e in emotions]
    out_rows = []
    for _, row in df.iterrows():
        base = {c: 0.0 for c in cols}
        base["track_spotify_id"] = str(row["track_spotify_id"]).strip()
        lst = row["emotions"] if isinstance(row["emotions"], list) else []
        for d in lst:
            lab = str(d.get("label", "")).strip().lower()
            score = round(float(d.get("score", 0.0) * 100), 4)
            col = to_snake(lab)
            if col in base:
                val = score  # store as percentage
                # if multiple entries for the same label, keep max
                base[col] = max(base[col], val)
        out_rows.append(base)
    wide = pd.DataFrame(out_rows, columns=cols)
    return wide


wide_df = df_to_emotion_wide(df, emotions)
print(f"Value: {wide_df.iloc[0, 1:].sum()}")  # The value needs to be ~= 100
wide_df.head()


Value: 100.0


Unnamed: 0,track_spotify_id,admiration,amusement,anger,annoyance,approval,caring,confusion,curiosity,desire,...,love,nervousness,neutral,optimism,pride,realization,relief,remorse,sadness,surprise
0,00AoRZ8103mVeOVfpTfGuR,3.173,4.5598,2.3774,2.26,4.5936,4.9051,2.3211,5.6093,11.5849,...,4.0344,2.7724,2.1814,3.2062,4.9871,6.5848,3.3952,1.7274,1.0017,2.8245
1,00BuKLSAFkaEkaVAgIMbeA,2.1868,4.2053,3.8345,4.1169,3.6588,6.3703,3.1063,4.2218,4.7444,...,2.9926,4.9757,2.2962,3.8949,4.0619,7.8664,4.8894,2.3435,1.4643,2.7717
2,00i0O74dXdaKKdCrqHnfXm,3.0423,6.3857,1.1765,1.5285,4.3693,5.3163,1.6337,4.5355,8.1992,...,4.1455,1.5365,1.8562,7.487,5.0033,5.1344,3.466,1.1589,0.6413,2.7813
3,00iQcGMeC6agUvBjHdkAAM,3.2848,3.6995,1.1481,1.509,4.0445,9.9763,1.6346,5.2694,7.3965,...,5.9853,1.9932,2.5723,4.2105,3.6533,6.726,6.6287,3.2235,4.3334,2.3926
4,017PF4Q3l4DBUiWoXk4OWT,2.0447,3.3372,1.7681,1.9308,3.118,5.5102,4.2814,4.4483,9.1658,...,4.8037,2.3928,1.5849,5.3296,3.2392,5.0693,2.8545,4.4973,5.9266,2.2279


4. Bulk upsert into MySQL (denormalized table)

In [19]:
conn = db.create_connection()
try:
    db.truncate_track_emotions_wide(conn)
except:
    print("Tabla no truncada")

db.execute_DDL(conn,ddl)
db.upsert_emotion_wide(conn, table_name, wide_df)
conn.close()

DDL ejecutado con éxito.


We store the extracted emotions in the track table of the database for added reliability.

In [20]:
conn = db.create_connection()
db.update_track_emotions(conn, df)
conn.close()

Updated emotions for 911 tracks.


## Emotion Dictionary Creation

In this step, we will **extract the emotion columns from `wide_df`**, use the language model (LLM) to classify them as **Positive, Neutral, or Negative**, and also assign a representative **emoji**.

With this information, we will build an **emotion dictionary** and store it in the database in the `emotions_dictionary` table (creating or truncating it beforehand).

In [21]:
import os
from dotenv import load_dotenv

# Carga las variables del archivo .env en el entorno
load_dotenv()

# Ahora puedes verificar que la clave está disponible
print("✅ OPENAI_API_KEY found:", bool(os.getenv("OPENAI_API_KEY")))

✅ OPENAI_API_KEY found: True


In [22]:
# =========================
# Emotions Dictionary: wide_df -> LLM -> DB
# =========================
# Requisitos:
#   - pip install openai python-dotenv pymysql
#   - .env con OPENAI_API_KEY=<tu_key>
#   - objeto 'db' con db.create_connection() -> PyMySQL connection
#   - DataFrame 'wide_df' ya en memoria
#
# Qué hace:
#   1) Extrae emociones desde wide_df.columns (excluye 'track_spotify_id')
#   2) Pide al LLM: emotion -> {emotion, normalize (Positive|Neutral|Negative), emoji}
#   3) Crea/TRUNCATE emotions_dictionary
#   4) Inserta mapping en MySQL
#   5) Devuelve el mapping

import os
import re
import json
import time
from typing import List, Dict, Any, Optional

import libraries.sqlite as db

# 1) Cargar .env (si existe)
try:
    from dotenv import load_dotenv

    load_dotenv()
except Exception:
    pass

# 2) Cliente OpenAI (lee OPENAI_API_KEY del entorno)
from openai import OpenAI

_client = OpenAI()  # si no existe la var, lanzará error al invocar


# ---------- Utilidades ----------

def _parse_json_lenient(text: str) -> Any:
    """
    Intenta parsear JSON aunque el modelo devuelva texto extra o ```json fences.
    """
    if text is None:
        raise ValueError("Empty LLM response.")
    cleaned = text.strip()

    # quitar fences ```json ... ```
    cleaned = re.sub(r"^```(?:json|JSON)?\s*", "", cleaned)
    cleaned = re.sub(r"\s*```$", "", cleaned)

    # intento directo
    try:
        return json.loads(cleaned)
    except Exception:
        pass

    # buscar objeto {...}
    s, e = cleaned.find("{"), cleaned.rfind("}")
    if s != -1 and e != -1 and s < e:
        try:
            return json.loads(cleaned[s:e + 1])
        except Exception:
            pass

    # buscar array [...]
    s, e = cleaned.find("["), cleaned.rfind("]")
    if s != -1 and e != -1 and s < e:
        try:
            return json.loads(cleaned[s:e + 1])
        except Exception:
            pass

    raise ValueError(f"Could not parse JSON from LLM output. Got:\n{cleaned[:500]}")


def _canon_norm(label: str) -> str:
    """
    Normaliza etiquetas a Positive|Neutral|Negative (acepta inglés/español/minúsculas).
    """
    if not label:
        return "Neutral"
    l = label.strip().lower()
    if l.startswith(("pos", "poz", "positivo")):
        return "Positive"
    if l.startswith(("neu", "neutro")):
        return "Neutral"
    if l.startswith(("neg", "negativo")):
        return "Negative"
    return "Neutral"


# ---------- LLM ----------

def llm_classify_emotions_with_openai(
        emotions: List[str],
        model: str = "gpt-4o-mini",
        temperature: float = 0.0,
        max_retries: int = 3,
        dry_run: bool = False,
        debug_print: bool = False,
) -> List[Dict[str, Any]]:
    """
    Pide al chat.completions un JSON con:
    [ {emotion, normalize (FP:Full Positive|MP: Positive|N:Neutral|MN:Mid Negative|FN: Full Negative), emoji}, ... ]
    Usa parser tolerante para extraer el JSON.
    """
    if not emotions:
        return []
    if dry_run:
        return [{"emotion": e, "normalize": "N", "emoji": "❓"} for e in emotions]

    system_msg = '''
    You are a precise JSON generator.
    For each input emotion, return a JSON array of objects with the following keys:
    - "emotion": the emotion name,
    - "normalize": one of ["FP" (Full Positive), "MP" (Mid Positive), "N" (Neutral), "MN" (Mid Negative), "FN" (Full Negative)],
    - "emoji": a single representative emoji.

    Return ONLY a valid JSON array, with no explanations and no markdown.
    '''
    user_msg = "Emotions:\n" + json.dumps(emotions, ensure_ascii=False)

    last_text: Optional[str] = None
    for attempt in range(1, max_retries + 1):
        try:
            resp = _client.chat.completions.create(
                model=model,
                temperature=temperature,
                messages=[
                    {"role": "system", "content": system_msg},
                    {"role": "user", "content": user_msg},
                ],
                max_tokens=1200,
            )
            text = resp.choices[0].message.content
            last_text = text
            parsed = _parse_json_lenient(text)

            # Acepta lista directa o objeto con "result"
            if isinstance(parsed, dict) and "result" in parsed and isinstance(parsed["result"], list):
                items = parsed["result"]
            elif isinstance(parsed, list):
                items = parsed
            else:
                raise ValueError("Parsed JSON is not a list nor an object with 'result'.")

            out: List[Dict[str, Any]] = []
            for row in items:
                if not isinstance(row, dict):
                    continue
                emo = str(row.get("emotion", "")).strip()
                norm = str(row.get("normalize", "")).strip()
                emoji = str(row.get("emoji", "")).strip()
                if emo and emoji:
                    out.append({"emotion": emo, "normalize": norm, "emoji": emoji})

            if not out:
                raise ValueError("Empty mapping after validation.")
            return out

        except Exception as e:
            if debug_print and last_text:
                print("LLM RAW OUTPUT (truncated):\n", last_text[:800])
            if attempt == max_retries:
                raise
            time.sleep(0.7 * attempt)


# ---------- Orquestación ----------

def build_and_store_emotions_dictionary_with_llm(
        wide_df,
        llm_model: str = "gpt-4o-mini",
        temperature: float = 0.0,
        dry_run_llm: bool = False,
        verbose: bool = False,
) -> List[Dict[str, Any]]:
    """
    1) Toma columnas de wide_df (excluye 'track_spotify_id') => emociones.
    2) Llama LLM para clasificar y asignar emoji.
    3) Crea/TRUNCATE emotions_dictionary y guarda.
    4) Devuelve la lista final insertada.
    """
    # 1) Extraer nombres de emociones desde los headers del DF
    all_cols = list(wide_df.columns)
    emotions_in_df = [c for c in all_cols if c.lower() != "track_spotify_id"]
    if verbose:
        print("Emotions from wide_df:", emotions_in_df)

    if not emotions_in_df:
        raise ValueError("No emotion columns found in wide_df (other than 'track_spotify_id').")

    # 2) LLM -> mapping
    mapping = llm_classify_emotions_with_openai(
        emotions=emotions_in_df,
        model=llm_model,
        temperature=temperature,
        dry_run=dry_run_llm,
        debug_print=verbose,
    )

    # 3) Ajustar a headers exactos (por si el LLM cambia mayúsculas/minúsculas)
    by_lower_original = {c.lower(): c for c in emotions_in_df}
    cleaned: List[Dict[str, Any]] = []
    for item in mapping:
        key = item["emotion"].strip().lower()
        if key in by_lower_original:
            cleaned.append({
                "emotion": by_lower_original[key],
                "normalize": item["normalize"],
                "emoji": item["emoji"]
            })

    if not cleaned:
        raise ValueError("LLM produced no valid emotions present in wide_df.")

    # 4) Persistir en DB
    db.create_and_truncate_emotions_dictionary()
    db.insert_emotions_dictionary(cleaned)

    return cleaned


In [23]:
result = build_and_store_emotions_dictionary_with_llm(
    wide_df,
    llm_model="gpt-4o-mini",
    temperature=0.2,
    dry_run_llm=False,  # True para probar sin consumir API
    verbose=True  # imprime las columnas/RAW si hay errores
)

Emotions from wide_df: ['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'neutral', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise']


In [24]:
print(f"Inserted {len(result)} emotions into emotions_dictionary")
result

Inserted 28 emotions into emotions_dictionary


[{'emotion': 'admiration', 'normalize': 'FP', 'emoji': '😍'},
 {'emotion': 'amusement', 'normalize': 'FP', 'emoji': '😂'},
 {'emotion': 'anger', 'normalize': 'FN', 'emoji': '😠'},
 {'emotion': 'annoyance', 'normalize': 'MN', 'emoji': '😒'},
 {'emotion': 'approval', 'normalize': 'FP', 'emoji': '👍'},
 {'emotion': 'caring', 'normalize': 'FP', 'emoji': '❤️'},
 {'emotion': 'confusion', 'normalize': 'N', 'emoji': '😕'},
 {'emotion': 'curiosity', 'normalize': 'MP', 'emoji': '🤔'},
 {'emotion': 'desire', 'normalize': 'FP', 'emoji': '😍'},
 {'emotion': 'disappointment', 'normalize': 'MN', 'emoji': '😞'},
 {'emotion': 'disapproval', 'normalize': 'MN', 'emoji': '👎'},
 {'emotion': 'disgust', 'normalize': 'FN', 'emoji': '🤢'},
 {'emotion': 'embarrassment', 'normalize': 'MN', 'emoji': '😳'},
 {'emotion': 'excitement', 'normalize': 'FP', 'emoji': '🎉'},
 {'emotion': 'fear', 'normalize': 'FN', 'emoji': '😨'},
 {'emotion': 'gratitude', 'normalize': 'FP', 'emoji': '🙏'},
 {'emotion': 'grief', 'normalize': 'FN', 'emo

# CLustering

We will now apply data clustering to generate groups that will serve as playlists, built around key aspects of related songs

In [25]:
import json, ast
import pandas as pd


def ensure_list_column(df: pd.DataFrame, col: str) -> pd.DataFrame:
    """
    Ensures column `col` is a Python list[str].
    Handles:
      - Python repr lists with single quotes: "['a', 'b']"
      - JSON lists: ["a","b"]
      - Comma-separated fallback: a, b
    """

    def _parse(x):
        if isinstance(x, list):
            return [str(t).strip() for t in x if str(t).strip()]
        if x is None or (isinstance(x, float) and pd.isna(x)):
            return []
        if isinstance(x, str):
            s = x.strip()
            if not s:
                return []
            # 1) Try Python literal (handles single quotes)
            if s.startswith("[") and s.endswith("]"):
                try:
                    v = ast.literal_eval(s)
                    if isinstance(v, list):
                        return [str(t).strip() for t in v if str(t).strip()]
                except Exception:
                    pass
                # 2) Try JSON (double quotes)
                try:
                    v = json.loads(s)
                    if isinstance(v, list):
                        return [str(t).strip() for t in v if str(t).strip()]
                except Exception:
                    pass
                # 3) Fallback: split inner by comma
                inner = s[1:-1]
                parts = [p.strip().strip("'").strip('"') for p in inner.split(",")]
                return [p for p in parts if p]
            # 4) Plain comma-separated
            return [t.strip() for t in s.split(",") if t.strip()]
        # last resort
        return [str(x).strip()] if str(x).strip() else []

    out = df.copy()
    out[col] = out[col].apply(_parse)
    return out

We check that the normalized_keywords column is of type list.

In [26]:
# Convert the column from string to list[str]
df = ensure_list_column(df, col="normalized_keywords")

# Sanity check
print(type(df.loc[df.index[0], "normalized_keywords"]), df.loc[df.index[0], "normalized_keywords"][:5])

<class 'list'> ['modern', 'party', 'romance', 'flirty', 'interaction']


This script implements an end-to-end pipeline to group songs into **thematic playlists** using **semantic keyword embeddings** and clustering with **KMeans**.

1. Build embeddings per track
- Each song has a set of *normalized keywords*.
- Using a *Sentence Transformers* model (`all-MiniLM-L6-v2`), vector embeddings are generated for those keywords.
- A *mean pooling* operation (averaging) is applied to obtain a single dense vector representing the entire track.

2. Clustering with KMeans
- The embeddings of all tracks are clustered into *k* groups using KMeans.
- The **Silhouette Score** is computed to quickly evaluate clustering quality.

3. Attach clusters back to the DataFrame
- A new column `cluster_emb` is added to the original DataFrame, indicating the cluster assignment for each track.

4. Cluster interpretability
- The most frequent keywords per cluster are extracted, producing a top-N list that summarizes the common themes of each group.
- Representative “Artist — Title” examples are sampled for each cluster, making it easier to inspect them manually.

In [27]:
import numpy as np
import pandas as pd
from typing import List
from collections import Counter, defaultdict
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score


# ---------------------------
# 1) Build embeddings per track
# ---------------------------
def compute_track_embeddings(
        df: pd.DataFrame,
        keywords_col: str = "normalized_keywords",
        model_name: str = "all-MiniLM-L6-v2",
        normalize: bool = True,
        max_keywords: int | None = None
) -> tuple[np.ndarray, List[int], SentenceTransformer]:
    """
    Computes a dense embedding per track by averaging the embeddings of its normalized keywords.
    Returns:
      - X: np.ndarray of shape (n_tracks, dim)
      - valid_idx: indices of rows that have at least 1 keyword (used to subset df later)
      - model: the SentenceTransformer model used
    """
    model = SentenceTransformer(model_name)
    vectors = []
    valid_idx = []

    for i, kws in enumerate(df[keywords_col].tolist()):
        if not isinstance(kws, list) or len(kws) == 0:
            continue
        toks = [str(k).strip() for k in kws if isinstance(k, str) and str(k).strip()]
        if not toks:
            continue
        if max_keywords is not None:
            toks = toks[:max_keywords]

        kw_emb = model.encode(toks, normalize_embeddings=normalize)
        if isinstance(kw_emb, list):
            kw_emb = np.asarray(kw_emb)
        track_vec = kw_emb.mean(axis=0)  # mean pooling over keywords
        vectors.append(track_vec)
        valid_idx.append(i)

    if not vectors:
        raise ValueError("No tracks produced embeddings. Check 'normalized_keywords' column.")
    X = np.vstack(vectors)
    return X, valid_idx, model


# ---------------------------
# 2) KMeans clustering
# ---------------------------
def cluster_tracks_embeddings(
        X: np.ndarray,
        k: int = 8,
        random_state: int = 42
) -> tuple[np.ndarray, KMeans, float]:
    """
    Clusters the embedding matrix X with KMeans.
    Returns:
      - labels: cluster assignment per row in X
      - kmeans: fitted model
      - sil: silhouette score (for quick quality check)
    """
    km = KMeans(n_clusters=k, random_state=random_state, n_init="auto")
    labels = km.fit_predict(X)
    sil = silhouette_score(X, labels) if len(set(labels)) > 1 else np.nan
    return labels, km, sil


# ---------------------------
# 3) Attach clusters back to df
# ---------------------------
def attach_clusters_to_df(
        df: pd.DataFrame,
        labels: np.ndarray,
        valid_idx: List[int],
        cluster_col: str = "cluster_emb"
) -> pd.DataFrame:
    """
    Adds the cluster labels to a copy of df at the indices used for embeddings.
    Rows without embeddings remain with NaN clusters.
    """
    out = df.copy()
    out[cluster_col] = np.nan
    out.loc[out.index[valid_idx], cluster_col] = labels
    if out[cluster_col].isna().any():
        out[cluster_col] = out[cluster_col].astype("Int64")
    return out


# ---------------------------
# 4) Cluster summaries (human-readable)
# ---------------------------
def top_keywords_per_cluster(
        df_with_clusters: pd.DataFrame,
        cluster_col: str = "cluster_emb",
        keywords_col: str = "normalized_keywords",
        top_n: int = 12
) -> dict[int, List[tuple[str, int]]]:
    """
    For interpretability: returns top-N most frequent normalized keywords per cluster.
    """
    summary = {}
    for c in sorted(df_with_clusters[cluster_col].dropna().unique()):
        sub = df_with_clusters[df_with_clusters[cluster_col] == c]
        cnt = Counter()
        for kws in sub[keywords_col]:
            if isinstance(kws, list):
                cnt.update([k for k in kws if isinstance(k, str) and k.strip()])
        summary[int(c)] = cnt.most_common(top_n)
    return summary


def sample_titles_per_cluster(
        df_with_clusters: pd.DataFrame,
        cluster_col: str = "cluster_emb",
        n_samples: int = 5
) -> dict[int, List[str]]:
    """
    Returns up to n_samples "Artist — Title" examples per cluster.
    """
    examples = {}
    for c in sorted(df_with_clusters[cluster_col].dropna().unique()):
        sub = df_with_clusters[df_with_clusters[cluster_col] == c].head(n_samples)
        examples[int(c)] = [
            f"{row['artist_name']} — {row['title']}"
            for _, row in sub.iterrows()
        ]
    return examples


# ---------------------------
# 5) End-to-end usage
# ---------------------------
# df is your DataFrame with columns:
# - track_spotify_id
# - artist_name
# - title
# - normalized_keywords (list[str])

# Build embeddings
X, valid_idx, st_model = compute_track_embeddings(
    df, keywords_col="normalized_keywords", model_name="all-MiniLM-L6-v2", normalize=True
)

# Cluster (choose k based on your dataset size; start with 6–12)
labels, kmeans, sil = cluster_tracks_embeddings(X, k=15, random_state=42)
print("Silhouette:", sil)

# Attach cluster labels back to df
df_emb = attach_clusters_to_df(df, labels, valid_idx, cluster_col="cluster_emb")

# Inspect
df_emb[["track_spotify_id", "artist_name", "title", "cluster_emb"]].head(10)

# Top keywords per cluster (for interpretation)
cluster_keywords = top_keywords_per_cluster(df_emb, cluster_col="cluster_emb", keywords_col="normalized_keywords",
                                            top_n=20)
for cid, tops in cluster_keywords.items():
    print(f"\nCluster {cid} top keywords:")
    print(", ".join([f"{w}({c})" for w, c in tops]))

# Optional: sample titles per cluster
examples = sample_titles_per_cluster(df_emb, cluster_col="cluster_emb", n_samples=10)
for cid, ex in examples.items():
    print(f"\nCluster {cid} examples:")
    for s in ex:
        print(" -", s)


Silhouette: 0.08390728384256363

Cluster 0 top keywords:
passion(7), defiance(7), identity(6), protest(6), emotion(6), culture(6), city(6), rebellion(6), energy(6), empowerment(6), rap(5), freedom(5), equality(5), bold(5), resistance(5), rhythm(5), struggle(5), conflict(5), resilience(5), critique(5)

Cluster 1 top keywords:
breakup(43), emotion(40), relationship(38), mood(35), conflict(35), vulnerable(34), drama(32), passion(31), pop(28), reflection(23), betrayal(23), desire(22), introspection(22), intense(20), regret(20), vocals(19), hurt(18), love(18), melody(18), expressive(17)

Cluster 2 top keywords:
romance(39), joy(37), energy(37), dance(37), pop(36), summer(33), fun(30), rhythm(30), love(29), melody(27), vibrant(27), celebration(26), upbeat(25), mood(24), cold(22), youthful(22), lighthearted(20), passion(19), catchy(19), playful(19)

Cluster 3 top keywords:
energy(42), confident(42), empowerment(37), bold(35), pop(34), dance(32), rhythm(28), freedom(26), attitude(26), catchy(2

Here we’ll do something interesting: we will send our clusters with their top keywords to the AI so that it can return a name and an engaging description, which we will use to build our playlists.

In [28]:
import os
import pandas as pd
from openai import OpenAI
import json, re

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))


def build_cluster_payload(df_with_clusters: pd.DataFrame,
                          cluster_col: str = "cluster_emb",
                          keywords_col: str = "normalized_keywords",
                          title_cols=("artist_name", "title"),
                          top_k_keywords: int = 15,
                          max_examples: int = 5) -> dict:
    """
    Prepares a payload per cluster:
      {cluster_id: {"keywords": [...], "examples": ["Artist — Title", ...]}}
    """
    payload = {}
    for cid in sorted(df_with_clusters[cluster_col].dropna().unique()):
        sub = df_with_clusters[df_with_clusters[cluster_col] == cid]
        # top keywords by simple frequency inside the cluster
        kw_series = sub[keywords_col].explode()
        kw_series = kw_series[kw_series.notna()].astype(str).str.strip().str.lower()
        top = (kw_series.value_counts().head(top_k_keywords).index.tolist()
               if not kw_series.empty else [])
        payload[int(cid)] = {"keywords": top}
    return payload



def parse_json_maybe(s: str) -> dict:
    # Normalize input
    if s is None:
        return {}
    s = s.strip().lstrip("\ufeff")

    # 1) Strip code fences if the whole thing is fence-wrapped
    s_nofence = re.sub(r"^\s*```(?:json|js|javascript)?\s*|\s*```\s*$", "", s, flags=re.I|re.S)
    try:
        return json.loads(s_nofence)
    except Exception:
        pass

    # 2) Extract JSON from a fenced block if present
    m = re.search(r"```(?:json|js|javascript)?\s*(\{.*?\})\s*```", s, flags=re.I|re.S)
    if m:
        try:
            return json.loads(m.group(1))
        except Exception:
            pass

    # 3) Raw-decode from first '{' that yields valid JSON
    dec = json.JSONDecoder()
    for i, ch in enumerate(s):
        if ch == "{":
            try:
                obj, end = dec.raw_decode(s[i:])
                return obj
            except Exception:
                continue

    # 4) Very last resort: grab a curly block and try fixing trailing commas
    m2 = re.search(r"\{.*\}", s, flags=re.S)
    if m2:
        frag = m2.group(0)
        # remove trailing commas before } or ]
        frag = re.sub(r",\s*([}\]])", r"\1", frag)
        try:
            return json.loads(frag)
        except Exception:
            pass

    return {}

def name_clusters_with_llm(cluster_payload: dict,
                           model: str = "gpt-4.1-mini",
                           temperature: float = 0.2) -> dict:
    """
    Sends cluster summaries to the LLM and returns:
      {cluster_id: {"name": str, "description": str}}
    Robust JSON parsing with fallback.
    """
    # Build prompt
    items = []
    for cid, data in cluster_payload.items():
        kws = ", ".join(data["keywords"])
        items.append(
            f"Cluster {cid}:\n"
            f"- Top keywords: {kws or '(none)'}\n"
        )
    clusters_block = "\n\n".join(items)

    prompt = (
        "You are an expert music curator. Name each cluster of songs concisely.\n"
        "Rules:\n"
        "- Provide a short, human-friendly name (≤ 4 words) and a 1–2 sentence description.\n"
        "- Avoid artist names and proper nouns; focus on themes/moods.\n"
        "- Prefer genre/vibe/emotion terms (e.g., 'Dancefloor Energy', 'Melancholic Love').\n"
        "- Keep names in Title Case, English only.\n"
        "Return ONLY a JSON object mapping cluster id to an object with fields 'name' and 'description'.\n\n"
        "Example output format:\n"
        "{\n"
        "  \"0\": {\"name\": \"Late-Night Romance\", \"description\": \"Sensual, intimate themes with slow dance vibes.\"},\n"
        "  \"1\": {\"name\": \"Party Anthems\", \"description\": \"High-energy club tracks centered on dancing and celebration.\"}\n"
        "}\n\n"
        "Clusters to label:\n"
        f"{clusters_block}\n\n"
        "Now return ONLY the JSON."
    )

    resp = client.responses.create(model=model, input=prompt, temperature=temperature)
    text = resp.output_text.strip()

    data = parse_json_maybe(text)
    # Normalize keys to ints if possible
    out = {}
    for k, v in data.items():
        try:
            cid = int(k)
        except Exception:
            cid = k
        name = (v or {}).get("name", "").strip()
        desc = (v or {}).get("description", "").strip()
        if name:
            out[cid] = {"name": name, "description": desc}
    return out


def attach_cluster_names(df_with_clusters: pd.DataFrame,
                         labels_map: dict,
                         cluster_col: str = "cluster_emb") -> pd.DataFrame:
    """
    Adds 'cluster_name' and 'cluster_desc' columns using labels_map from LLM.
    """
    out = df_with_clusters.copy()
    out["cluster_name"] = out[cluster_col].map(
        lambda c: labels_map.get(int(c), {}).get("name") if pd.notna(c) else None)
    out["cluster_desc"] = out[cluster_col].map(
        lambda c: labels_map.get(int(c), {}).get("description") if pd.notna(c) else None)
    return out


In [29]:
# 1) Build payload from your clustered DataFrame (assumes 'cluster_emb' and 'normalized_keywords')
payload = build_cluster_payload(df_emb, cluster_col="cluster_emb", keywords_col="normalized_keywords",
                                top_k_keywords=20, max_examples=5)

# 2) Ask the LLM for names/descriptions
labels_map = name_clusters_with_llm(payload, model="gpt-4.1-mini", temperature=0.2)

# 3) Attach names back to your DataFrame
df_named = attach_cluster_names(df_emb, labels_map, cluster_col="cluster_emb")
df_named[["cluster_emb", "cluster_name", "cluster_desc"]].drop_duplicates().sort_values("cluster_emb").head(20)



Unnamed: 0,cluster_emb,cluster_name,cluster_desc
6,0.0,Empowered Rebellion,"Bold, energetic tracks centered on defiance, c..."
31,1.0,Heartbreak Drama,"Emotional pop songs exploring vulnerability, c..."
12,2.0,Joyful Summer Pop,"Upbeat, vibrant dance tracks filled with youth..."
1,3.0,Confident Dance Anthems,"Bold, modern pop with empowering attitudes and..."
14,4.0,Moody Rock Reflection,"Intense, introspective rock with dark atmosphe..."
51,5.0,Sensual Nightlife Energy,"Dynamic, passionate dance tracks with bold bea..."
3,6.0,Nostalgic Acoustic Romance,"Gentle, heartfelt pop with reflective melodies..."
22,7.0,Vibrant Party Vibes,"Energetic, confident dance music perfect for l..."
5,8.0,Sentimental Breakup Ballads,"Vulnerable, introspective pop exploring regret..."
9,9.0,Tender Romantic Ballads,"Expressive, heartfelt songs focused on love, p..."


We update the CSV to streamline the process.

In [32]:
df_named.to_csv("tracks_with_normalized_keywords.csv", index=False, encoding="utf-8")

In this step, we connected our DataFrame (`df_named`) with the database to properly manage clusters and link them to tracks.

In [33]:
import pandas as pd
import libraries.sqlite as db

conn = db.create_connection()
db.sync_clusters_and_update_tracks(conn, df_named)
conn.close()

Inserted clusters: 15 | Updated tracks: 911


# Show Results

## Useful Functions

We’ll start with functions that will help us get what we want. In this case, we’re going to create a function that retrieves the overall sentiment of all the songs along with the specific data, taking the top 5 most representative songs for each emotion.
We’ll also be able to see it by album, and even by individual song if we want. Additionally, we’ll obtain the suggested playlists along with their main emotion.

####
1. Get the overall emotion of the entire Spotify

In [1]:
import libraries.sqlite as db
db.fetch_emotions_dictionary()

[{'emotion': 'admiration', 'normalize': 'FP', 'emoji': '😍'},
 {'emotion': 'amusement', 'normalize': 'FP', 'emoji': '😂'},
 {'emotion': 'anger', 'normalize': 'FN', 'emoji': '😠'},
 {'emotion': 'annoyance', 'normalize': 'MN', 'emoji': '😒'},
 {'emotion': 'approval', 'normalize': 'FP', 'emoji': '👍'},
 {'emotion': 'caring', 'normalize': 'FP', 'emoji': '❤️'},
 {'emotion': 'confusion', 'normalize': 'N', 'emoji': '😕'},
 {'emotion': 'curiosity', 'normalize': 'MP', 'emoji': '🤔'},
 {'emotion': 'desire', 'normalize': 'FP', 'emoji': '😍'},
 {'emotion': 'disappointment', 'normalize': 'MN', 'emoji': '😞'},
 {'emotion': 'disapproval', 'normalize': 'MN', 'emoji': '👎'},
 {'emotion': 'disgust', 'normalize': 'FN', 'emoji': '🤢'},
 {'emotion': 'embarrassment', 'normalize': 'MN', 'emoji': '😳'},
 {'emotion': 'excitement', 'normalize': 'FP', 'emoji': '🎉'},
 {'emotion': 'fear', 'normalize': 'FN', 'emoji': '😨'},
 {'emotion': 'gratitude', 'normalize': 'FP', 'emoji': '🙏'},
 {'emotion': 'grief', 'normalize': 'FN', 'emo

####
2. Fetch clusters

In [2]:
db.get_emotions_from_dictionary()

['admiration',
 'amusement',
 'anger',
 'annoyance',
 'approval',
 'caring',
 'confusion',
 'curiosity',
 'desire',
 'disappointment',
 'disapproval',
 'disgust',
 'embarrassment',
 'excitement',
 'fear',
 'gratitude',
 'grief',
 'joy',
 'love',
 'nervousness',
 'neutral',
 'optimism',
 'pride',
 'realization',
 'relief',
 'remorse',
 'sadness',
 'surprise']

####
3. Fetch info

In [None]:
db.fetch_cluster_avg_emotions(1)
db.fetch_all_artists()
db.fetch_all_albums()

In [None]:
import libraries.sqlite as db
conn=db.create_connection()
db.fetch_tracks_with_buckets_paginated(conn)

## FastAPI Service

We are going to set up a FastAPI service to provide the endpoints that will power a frontend, so you can visualize the lists created by the app and more information about your emotions. For this, I’ve built a small interface with Vue.

In [1]:
# server.py
import os
import time
import threading
from typing import Optional

import uvicorn
from fastapi import FastAPI, APIRouter, HTTPException, Query
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from starlette.routing import Mount

import libraries.sqlite as db
from pydantic import BaseModel
from libraries.spotify import get_spotify_auth, get_spotify_client, upsert_playlist_by_name

# Spotify config (env-driven; must match your app's settings)
SPOTIPY_CLIENT_ID = os.getenv("SPOTIPY_CLIENT_ID", "ec6c43ef03034d7393a4906cd1df5f06")
SPOTIPY_REDIRECT_URI = os.getenv("SPOTIPY_REDIRECT_URI", "https://spotify-auth.viant.dev/callback")
SPOTIPY_CACHE_PATH = os.getenv("SPOTIPY_CACHE_PATH", ".cache")
# For playlist ops we need these scopes; add others as required
SPOTIPY_SCOPE = os.getenv("SPOTIPY_SCOPE", "user-library-read playlist-modify-public playlist-modify-private")



# =========================
# FastAPI app
# =========================
app = FastAPI()

# CORS (relax as needed)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


# =========================
# API router under /api
# =========================
api = APIRouter(prefix="/api")

@api.get("/health")
def health():
    return {"status": "ok"}

@api.get("/playlist-emotions")
def getNormalizeEmotions():
    avgEmotions = db.group_avg_emotions_by_normalize(db.fetch_avg_per_emotion())
    return {"emotions": avgEmotions}

@api.get("/playlist-emotions/all")
def getAllEmotions():
    avgEmotions = db.fetch_avg_per_emotion()
    return {"emotions": avgEmotions, "dictionary": db.fetch_emotions_dictionary()}

@api.get("/dictionary/emotions")
def get_emotions_dic():
    return db.fetch_emotions_dictionary()

@app.get("/callback")
def spotify_callback(code: str | None = None, error: str | None = None):
    if error:
        raise HTTPException(status_code=400, detail=f"Spotify error: {error}")
    if not code:
        raise HTTPException(status_code=400, detail="Missing authorization code")

    auth = get_spotify_auth(
        client_id=SPOTIPY_CLIENT_ID,
        redirect_uri=SPOTIPY_REDIRECT_URI,
        scope=SPOTIPY_SCOPE,
        cache_path=SPOTIPY_CACHE_PATH,
        open_browser=True,
    )
    # Intercambia el code y guarda el token en .cache
    auth.get_access_token(code, check_cache=True)
    return {"status": "ok", "message": "Auth OK. You can close this tab."}

@api.get("/emotion-tracks")
def list_tracks(
    page: int = Query(1, ge=1),
    page_size: int = Query(10, ge=1, le=200),
    track: str | None = Query(None, description="Filter by track name (case-insensitive)"),
    artist: str | None = Query(None, description="Filter by artist name (case-insensitive)"),
    album: str | None = Query(None, description="Filter by album name (case-insensitive)"),
    sort_by: str = Query("track", description="Sort key"),
    sort_dir: str = Query("asc", description="asc or desc"),
):
    sort_by = sort_by if sort_by in db.ALLOWED_SORT_KEYS else "track"
    sort_dir = "desc" if sort_dir.lower() == "desc" else "asc"

    try:
        conn = db.create_connection()
        items, total = db.fetch_tracks_with_buckets_paginated(
            conn=conn,
            page=page,
            page_size=page_size,
            track=track,
            artist=artist,
            album=album,
            sort_by=sort_by,
            sort_dir=sort_dir,
        )
    except Exception as e:
        parts = [str(a) for a in getattr(e, "args", []) if a]
        detail = " ".join(parts) if parts else str(e)
        raise HTTPException(status_code=500, detail=f"Database error: {detail}".strip())
    finally:
        try:
            conn.close()
        except:
            pass

    return {
        "page": page,
        "page_size": page_size,
        "total": total,
        "sort_by": sort_by,
        "sort_dir": sort_dir,
        "items": items,
    }

@api.get("/playlist-cluster")
def get_playlist_cluster():
    return db.fetch_all_tracks_with_clusters()

@api.get("/playlist-cluster/{cluster_id}")
def get_playlist_cluster(
        cluster_id: int,
):
    return db.fetch_tracks_by_cluster(cluster_id)

@api.get("/clusters")
def get_playlist_cluster_all():
    return db.fetch_clusters()

@app.get("/spotify/callback")
def spotify_callback(code: str | None = None, error: str | None = None):
    """
    PKCE callback: exchange 'code' for tokens and persist them in the cache.
    """
    if error:
        raise HTTPException(status_code=400, detail=f"Spotify error: {error}")
    if not code:
        raise HTTPException(status_code=400, detail="Missing authorization code")

    auth = get_spotify_auth(
        client_id=SPOTIPY_CLIENT_ID,
        redirect_uri=SPOTIPY_REDIRECT_URI,
        scope=SPOTIPY_SCOPE,
        cache_path=SPOTIPY_CACHE_PATH,
        open_browser=True,
    )
    # Store tokens in cache
    auth.get_access_token(code, check_cache=True)
    return {"status": "ok", "message": "Auth OK. You can close this tab."}

class ClusterPlaylistBody(BaseModel):
    description: str = "Generated from cluster"
    public: bool = False
    replace: bool = True  # replace items instead of append

@api.post("/clusters/{cluster_id}/playlist")
def create_or_update_cluster_playlist(cluster_id: int, body: ClusterPlaylistBody):
    """
    Build a Spotify client from cached PKCE tokens, fetch cluster tracks from SQLite,
    and upsert a playlist named after the cluster.
    """
    # 1) Create Spotipy client with playlist scopes using your existing helper
    sp = get_spotify_client(
        client_id=SPOTIPY_CLIENT_ID,
        redirect_uri=SPOTIPY_REDIRECT_URI,
        scope=SPOTIPY_SCOPE,
        cache_path=SPOTIPY_CACHE_PATH,
        open_browser=False,
    )

    # 2) Pull tracks for cluster
    rows = db.fetch_tracks_by_cluster(cluster_id)
    if not rows:
        raise HTTPException(status_code=404, detail=f"No tracks found for cluster_id={cluster_id}")

    cluster_name = rows[0].get("cluster_name") or f"Cluster {cluster_id}"
    track_ids = [r["track_spotify_id"] for r in rows if r.get("track_spotify_id")]

    print(f"Tracks found: {len(track_ids)}")
    print(track_ids)

    if not track_ids:
        raise HTTPException(status_code=404, detail=f"No track_spotify_id values for cluster_id={cluster_id}")

    # 3) Create/update playlist by name
    pid, url = upsert_playlist_by_name(
        sp=sp,
        name=cluster_name,
        description=body.description,
        track_ids_or_urls=track_ids,
        public=body.public,
        replace=body.replace,
    )

    # Comprueba propiedad del playlist (debe ser tu usuario actual)
    owner = sp.playlist(pid, fields="owner.id").get("owner", {}).get("id")
    current = sp.current_user().get("id")
    if owner != current:
        raise HTTPException(status_code=403, detail=f"Playlist owner is {owner}, not {current}. You cannot modify it.")

    # Lee total de items tras la operación
    total_after = sp.playlist_items(pid, fields="total").get("total")
    print(f"Total tracks: {total_after}")

    snapshots = []  # captura lo que retorne upsert internamente si lo propagas
    pl_info = sp.playlist_items(pid, fields="total")
    return {
        "cluster_id": cluster_id,
        "cluster_name": cluster_name,
        "requested_tracks": len(track_ids),
        "playlist_id": pid,
        "playlist_url": url,
        "playlist_items_after": pl_info.get("total"),
        "snapshots": snapshots,  # útil para confirmar mutaciones
    }


app.include_router(api)


# =========================
# SPA static mounting with history fallback
# =========================
class SPAStaticFiles(StaticFiles):
    """
    Static files with history-fallback: if path not found, serve index.html.
    This enables Vue Router (history mode) deep links.
    """
    async def get_response(self, path, scope):
        response = await super().get_response(path, scope)
        if response.status_code == 404:
            return await super().get_response("index.html", scope)
        return response


def initialize_frontend(
    frontend_dir: Optional[str] = None,
    mount_path: str = "/",
    env_var: str = "FRONTEND_DIR",
    default_rel: str = "../frontend/dist",
) -> str:
    """
    Mount a built Vue app (SPA) at `mount_path`, with history fallback.
      - If `frontend_dir` is None, uses $FRONTEND_DIR or `default_rel`.
      - Idempotent: skips if already mounted at `mount_path`.
      - Returns absolute path used.
    """
    base = frontend_dir or os.getenv(env_var, default_rel)
    abs_dir = os.path.abspath(base)

    # Avoid duplicate mounts
    for r in app.routes:
        if isinstance(r, Mount) and r.path == mount_path:
            print(f"[init] SPA already mounted at {mount_path} → {abs_dir}")
            return abs_dir

    index_path = os.path.join(abs_dir, "index.html")
    if not os.path.exists(index_path):
        print(f"[warn] {abs_dir} does not contain index.html. Did you run `npm run build`?")

    app.mount(mount_path, SPAStaticFiles(directory=abs_dir, html=True), name="spa")
    print(f"[init] SPA mounted at {mount_path} from {abs_dir}  (API under /api)")
    return abs_dir


# =========================
# Server controls
# =========================
SERVER_STATE = {"server": None, "thread": None}

def start_server(host="127.0.0.1", port=8080):
    """
    Start Uvicorn in a background thread.
    """
    if SERVER_STATE["server"] is not None:
        print(f"⚠️ Server already running at http://{host}:{port}")
        return

    config = uvicorn.Config(app, host=host, port=port, log_level="info")
    server = uvicorn.Server(config)

    thread = threading.Thread(target=server.run, daemon=True)
    thread.start()

    time.sleep(0.5)
    SERVER_STATE.update({"server": server, "thread": thread})
    print(f"🚀 Server running at http://{host}:{port}  (API under /api)")

def stop_server():
    """
    Stop the background Uvicorn server.
    """
    server = SERVER_STATE.get("server")
    thread = SERVER_STATE.get("thread")
    if server is None:
        print("ℹ️ No server is currently running.")
        return
    server.should_exit = True
    if thread and thread.is_alive():
        thread.join(timeout=3)
    SERVER_STATE.update({"server": None, "thread": None})
    print("🛑 Server stopped.")

def main(frontend_dir:str = None):
    # Mount frontend (uses FRONTEND_DIR env var or ../frontend/dist)
    initialize_frontend(frontend_dir=frontend_dir, mount_path="/")
    # Start server
    start_server(host=os.getenv("HOST", "127.0.0.1"), port=int(os.getenv("PORT", "8080")))

In [2]:
main("playlist-emotions/dist")

INFO:     Started server process [78413]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8080 (Press CTRL+C to quit)


[init] SPA mounted at / from /Users/vchiriguaya/ProjectOffline/spotify-emotions/playlist-emotions/dist  (API under /api)
🚀 Server running at http://127.0.0.1:8080  (API under /api)


## Stop Server

When you finish, run this cell to shut down the server.

In [None]:
stop_server()