In [None]:
!pip install dash



In [None]:
import dash
from dash import dcc, html, Output, Input, State, callback_context
import plotly.express as px
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import plotly.graph_objs as go

In [None]:
# Load the dataset
df = pd.read_csv('hottest_50_1960_2022_encoding.csv', encoding='utf-8-sig')

# Convert date column to datetime
df['date'] = pd.to_datetime(df['date'], errors='coerce')
df['year'] = df['date'].dt.year

# Convert genres from string to list
df['genres'] = df['genres'].apply(
    lambda x: [g.strip("'[] ") for g in x.split(',')] if isinstance(x, str) else []
)

In [None]:
# Genre mapping dictionary
genre_mapping = {
    'country': ['country', 'nashville sound', 'cowboy western', 'arkansas country'],
    'pop': ['pop', 'bubblegum pop', 'dance pop', 'brill building pop', 'sunshine pop'],
    'r&b': ['r&b', 'pop r&b', 'classic soul', 'motown'],
    'hip hop/rap': ['rap/hip hop', 'hip hop', 'rap', 'trap', 'gangster rap'],
    'rock': ['rock', 'classic rock', 'rock-and-roll', 'folk rock', 'garage rock'],
    'soul': ['soul', 'southern soul', 'memphis soul', 'northern soul']
}

# Create reverse mapping
reverse_mapping = {}
for main_genre, subgenres in genre_mapping.items():
    for subgenre in subgenres:
        reverse_mapping[subgenre] = main_genre

# Map genres
def map_genres(genre_list):
    mapped = []
    for genre in genre_list:
        genre_lower = genre.lower()
        for sub, main in reverse_mapping.items():
            if sub in genre_lower:
                mapped.append(main)
                break
    return list(set(mapped))  # Remove duplicates

df['main_genres'] = df['genres'].apply(map_genres)

# Explode the list of main genres
df_exploded = df.explode('main_genres')

# Filter to our target genres
target_genres = ["country", "pop", "r&b", "hip hop/rap", "rock", "soul"]
df_filtered = df_exploded[df_exploded['main_genres'].isin(target_genres)]

# Create year column
df_filtered['year'] = pd.to_datetime(df_filtered['date']).dt.year

# Create aggregated data
genre_counts = df_filtered.groupby(['year', 'main_genres']).size().reset_index(name='count')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
  # Radar Chart
  features = ['loudness', 'danceability', 'energy', 'tempo', 'valence', 'acousticness', 'liveness', 'speechiness']
  df['decade'] = (df['year'] // 10) * 10

  # Normalize tempo and loudness
  df['tempo'] = (df['tempo'] - df['tempo'].min()) / (df['tempo'].max() - df['tempo'].min())
  df['loudness'] = (df['loudness'] - df['loudness'].min()) / (df['loudness'].max() - df['loudness'].min())

  # Recalculate decade averages
  decade_avg = df.groupby('decade')[features].mean().reset_index()

  def radar_chart(decade):
      data = decade_avg[decade_avg['decade'] == decade].melt(id_vars=['decade'])
      fig = px.line_polar(data, r='value', theta='variable', line_close=True, title=f'Audio Features in {decade}s')
      fig.update_layout(polar=dict(radialaxis=dict(range=[0, 1])))
      fig.update_traces(mode="lines+markers+text", text=data['value'].round(2), textposition="top center",   textfont_size=8)

      fig.update_layout(
        polar=dict(
            radialaxis=dict(
                range=[0, 1],
                showticklabels=False,
            )
        )
    )
      return fig

  # Create radar charts for 1960, 1980, 2000, 2020
  fig_radar_1960 = radar_chart(1960)
  fig_radar_1980 = radar_chart(1980)
  fig_radar_2000 = radar_chart(2000)
  fig_radar_2020 = radar_chart(2020)

In [None]:
# Linear Regression & Moving Average graphs
def apply_regression(df, feature):
    df_feature = df.groupby('year')[feature].mean().reset_index()
    df_feature['moving_avg'] = df_feature[feature].rolling(window=5).mean()
    X = df_feature['year'].values.reshape(-1, 1)
    y = df_feature[feature].values.reshape(-1, 1)
    model = LinearRegression().fit(X, y)
    df_feature['trend'] = model.predict(X)
    return df_feature

In [None]:
# Dash App
app = dash.Dash(__name__)
app.layout = html.Div([
    html.H1("Music Genre Popularity (1960–2022)", style={'textAlign': 'center'}),

    # Hidden Store to hold selected genres
    dcc.Store(id='selected-genres-store', data=target_genres),

    # Time Range Slider
    html.Div([
    dcc.RangeSlider(
        id='year-range-slider',
        min=df['year'].min(),
        max=df['year'].max(),
        value=[df['year'].min(), df['year'].max()],
        marks={str(year): str(year) for year in range(df['year'].min(), df['year'].max() + 1, 10)},
        step=1,
        allowCross=False,
        tooltip={"placement": "bottom", "always_visible": True},
    )
    ], style={'margin': '40px 60px'}),


    # Multi-Select Dropdown for Genres
     html.Div([
        dcc.Dropdown(
            id='genre-dropdown',
            options=[{'label': genre.title(), 'value': genre} for genre in target_genres],
            value=target_genres,
            multi=True,
            placeholder="Select genres...",
            style={
                'width': '100%',
                'maxWidth': '1000px',
                'margin': '0 auto',
            }
        )
    ], style={'width': '100%', 'maxWidth': '1000px', 'margin': '20px auto'}),

    # Genre Trend Graph
    dcc.Graph(id='genre-trend'),

    # Radar Charts for decades
    html.Div([
        html.Div([
            dcc.Graph(id='radar-1960', style={'width': '50%'}),
            dcc.Graph(id='radar-1980', style={'width': '50%'})
        ], style={'display': 'flex'}),
        html.Div([
            dcc.Graph(id='radar-2000', style={'width': '50%'}),
            dcc.Graph(id='radar-2020', style={'width': '50%'})
        ], style={'display': 'flex'})
    ]),


    # Feature Multi-Select Dropdown
    html.Div([
    dcc.Dropdown(
        id='feature-dropdown',
        options=[{'label': f.title(), 'value': f} for f in features],
        value=features,  # default: all selected
        multi=True,
        placeholder="Select features...",
        style={'width': '100%', 'maxWidth': '1000px', 'margin': '0 auto'}
    )
    ], style={'textAlign': 'center', 'marginTop': '20px'}),



    # Regression and Moving Average Graphs
    dcc.Graph(id='regression-graph'),
    dcc.Graph(id='moving-avg-graph')
])


In [None]:
# Update selected genres from dropdown
@app.callback(
    Output('selected-genres-store', 'data'),
    Input('genre-dropdown', 'value')
)
def update_selected_genres(selected):
    return selected

@app.callback(
    Output('genre-trend', 'figure'),
    [Input('selected-genres-store', 'data'),
     Input('year-range-slider', 'value')]
)
def update_genre_trend(selected_genres, year_range):
    start_year, end_year = year_range
    filtered = df_filtered[
        (df_filtered['main_genres'].isin(selected_genres)) &
        (df_filtered['year'] >= start_year) & (df_filtered['year'] <= end_year)
    ]
    genre_counts = filtered.groupby(['year', 'main_genres']).size().reset_index(name='count')
    pivot_df = genre_counts.pivot_table(index='year', columns='main_genres', values='count', aggfunc='sum')\
        .fillna(0).reset_index()

    fig = px.area(
        pivot_df, x='year', y=selected_genres,
        title="Genre Popularity Over Time",
        color_discrete_map={
            'rock': '#1f77b4', 'pop': '#ff7f0e', 'soul': '#2ca02c',
            'r&b': '#d62728', 'hip hop/rap': '#9467bd', 'country': '#8c564b'
        }
    )

    fig.update_layout(
        legend_title="Genres",
        xaxis_title="Year",
        yaxis_title="Number of Songs",
        hovermode="x unified",
        margin=dict(l=60, r=40, t=60, b=40),
        xaxis=dict(range=[pivot_df['year'].min() - 1, pivot_df['year'].max()])
    )
    return fig

@app.callback(
    [Output('radar-1960', 'figure'),
     Output('radar-1980', 'figure'),
     Output('radar-2000', 'figure'),
     Output('radar-2020', 'figure')],
    [Input('selected-genres-store', 'data'),
     Input('year-range-slider', 'value')]
)
def update_radar_charts(selected_genres, year_range):
    start_year, end_year = year_range

    def create_radar(decade):
        filtered = df[
            (df['decade'] == decade) &
            (df['year'] >= start_year) & (df['year'] <= end_year) &
            (df['main_genres'].apply(lambda x: any(g in selected_genres for g in x)))
        ]
        if filtered.empty:
            return go.Figure().add_annotation(text="No data", showarrow=False)

        avg = filtered[features].mean().round(2)
        data = pd.DataFrame({'feature': features, 'value': avg, 'text': avg.round(2)})

        fig = px.line_polar(data, r='value', theta='feature', text='text', line_close=True, title=f'{decade}s Features')
        fig.update_traces(mode="lines+markers+text", textposition="top center", textfont_size=8, line=dict(width=2))
        fig.update_layout(polar=dict(radialaxis=dict(range=[0, 1], showticklabels=False)), showlegend=False)
        return fig

    return create_radar(1960), create_radar(1980), create_radar(2000), create_radar(2020)

@app.callback(
    [Output('regression-graph', 'figure'),
     Output('moving-avg-graph', 'figure')],
    [Input('selected-genres-store', 'data'),
     Input('feature-dropdown', 'value'),
     Input('year-range-slider', 'value')]
)
def update_trend_graphs(selected_genres, selected_features, year_range):
    start_year, end_year = year_range
    filtered_df = df[
        df['main_genres'].apply(lambda x: any(g in selected_genres for g in x)) &
        (df['year'] >= start_year) & (df['year'] <= end_year)
    ]

    regression_fig = go.Figure()
    moving_avg_fig = go.Figure()

    for i, feature in enumerate(selected_features):
        trend_data = apply_regression(filtered_df, feature)
        color = px.colors.qualitative.Plotly[i % 10]

        regression_fig.add_trace(go.Scatter(x=trend_data['year'], y=trend_data[feature],
                                            mode='markers', name=f'{feature} Data', marker=dict(color=color)))
        regression_fig.add_trace(go.Scatter(x=trend_data['year'], y=trend_data['trend'],
                                            mode='lines', name=f'{feature} Trend', line=dict(color=color)))

        moving_avg_fig.add_trace(go.Scatter(x=trend_data['year'], y=trend_data['moving_avg'],
                                            mode='lines', name=f'{feature} MA', line=dict(color=color)))

    regression_fig.update_layout(title='Linear Regression Trends')
    moving_avg_fig.update_layout(title='5-Year Moving Averages')
    return regression_fig, moving_avg_fig

In [None]:
# Run App
if __name__ == '__main__':
    app.run(debug=True)

<IPython.core.display.Javascript object>