In [None]:
!pip install dash

Collecting dash
  Downloading dash-3.0.2-py3-none-any.whl.metadata (10 kB)
Collecting Flask<3.1,>=1.0.4 (from dash)
  Downloading flask-3.0.3-py3-none-any.whl.metadata (3.2 kB)
Collecting Werkzeug<3.1 (from dash)
  Downloading werkzeug-3.0.6-py3-none-any.whl.metadata (3.7 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Downloading dash-3.0.2-py3-none-any.whl (7.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m56.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading flask-3.0.3-py3-none-any.whl (101 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.7/101.7 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading werkzeug-3.0.6-py3-none-any.whl (227 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m228.0/228.0 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading retrying-1.3.4-py3-none-any.whl (11 kB)
Installing collected packages: Werkzeug, retryi

In [None]:
import dash
from dash import dcc, html, Input, Output
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
import plotly.express as px
import plotly.graph_objects as go

In [None]:
# Load and preprocess data
df = pd.read_csv('hottest_50_1960_2022_encoding.csv')
df = df.dropna(subset=['danceability', 'energy', 'loudness', 'speechiness', 'valence', 'tempo'])  # Exclude missing rows
df['year'] = df['date'].str[:4].astype(int)
df['decade'] = (df['year'] // 10) * 10  # Group into decades

# Features for clustering
features = ['danceability', 'energy', 'loudness', 'speechiness', 'valence', 'tempo']
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df[features])

# Determine optimal clusters (Elbow Method)
wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, random_state=42)
    kmeans.fit(df_scaled)
    wcss.append(kmeans.inertia_)
optimal_clusters = 4  # Adjust based on elbow curve

In [None]:
# Initialize Dash app
app = dash.Dash(__name__)
app.layout = html.Div([
    html.H1("Dynamic Genre Evolution (1960-2020)", style={'textAlign': 'center'}),

    html.Div([
        dcc.Dropdown(
            id='decade-selector',
            options=[{'label': f'{decade}s', 'value': decade} for decade in sorted(df['decade'].unique())],
            value=df['decade'].min(),
            placeholder="Select Decade",
            style={'width': '100%'}
        )
    ], style={'width': '50%', 'margin': '20px auto'}),

    dcc.Graph(id='cluster-plot'),
    dcc.Graph(id='heatmap'),

    html.H3("Select Feature for Evolution Comparison:", style={'textAlign': 'center'}),

    html.Div([
        dcc.Dropdown(
            id='feature-selector',
            options=[{'label': feat.title(), 'value': feat} for feat in features],
            value='danceability',
            placeholder="Select Feature",
            style={'width': '100%'}
        )
    ], style={'width': '50%', 'margin': '20px auto'}),

    dcc.Graph(id='feature-evolution')
])


In [None]:
# Callbacks for interactivity
@app.callback(
    [Output('cluster-plot', 'figure'),
     Output('heatmap', 'figure'),
     Output('feature-evolution', 'figure')],
    [Input('decade-selector', 'value'),
     Input('feature-selector', 'value')]
)
def update_plots(selected_decade, selected_feature):
    # Filter data by decade
    df_decade = df[df['decade'] == selected_decade].copy()
    scaled_data = scaler.transform(df_decade[features])

    # K-Means Clustering
    kmeans = KMeans(n_clusters=optimal_clusters, random_state=42)
    df_decade['cluster'] = kmeans.fit_predict(scaled_data)

    # t-SNE Projection
    tsne = TSNE(n_components=2, random_state=42, perplexity=30, n_iter=500)
    tsne_results = tsne.fit_transform(scaled_data)
    df_decade['tsne_x'] = tsne_results[:, 0]
    df_decade['tsne_y'] = tsne_results[:, 1]

    # Cluster vs Genre Heatmap
    heatmap_data = pd.crosstab(df_decade['cluster'], df_decade['genre_encoding'])
    heatmap_fig = px.imshow(heatmap_data, labels=dict(x="Genre", y="Cluster", color="Count"),
                            title=f"Cluster vs Genre (Decade: {selected_decade}s)")

    # Feature Evolution (2000s vs 2020s)
    df_2000s = df[df['decade'] == 2000]
    df_2020s = df[df['decade'] == 2020]
    feature_evolution_fig = go.Figure()

    feature_evolution_fig.add_trace(go.Box(
        x=df_2000s['genre_encoding'],
        y=df_2000s[selected_feature],
        name=f'2000s {selected_feature.title()}'
    ))

    feature_evolution_fig.add_trace(go.Box(
        x=df_2020s['genre_encoding'],
        y=df_2020s[selected_feature],
        name=f'2020s {selected_feature.title()}'
    ))

    feature_evolution_fig.update_layout(
        title=f"Feature Evolution: {selected_feature.title()} (2000s vs 2020s)",
        xaxis_title="Genre Encoding",
        yaxis_title=selected_feature.title()
    )

    # Cluster Plot
    cluster_fig = px.scatter(
        df_decade, x='tsne_x', y='tsne_y', color='cluster',
        hover_data=['title', 'artist', 'genre_encoding'],
        title=f"Clusters in the {selected_decade}s (t-SNE Projection)"
    )

    return cluster_fig, heatmap_fig, feature_evolution_fig


In [None]:
# Run the app in Colab
app.run(mode='inline', port=8050)

<IPython.core.display.Javascript object>