In [1]:
from dash import dcc
from dash import html
from dash import Dash, dcc, html
import pandas as pd
from dash.dependencies import Input, Output

This project takes free public data from Kaggle to produce three interactive graphs on streaming platforms using Dash. These graphs are chosen for their user-friendly interpretation, relevance in business analytic, and correlation to change over time. The dashboard created using Dash is focused on the data visualization rather than web programming, and the design of the app is intentionally simple for ease of access.

### Data Tranformation

Introducing, cleaning, and transforming data

In [2]:
# Streaming platforms data focused on movies

movies = (
    pd.read_csv('movies_streaming_platforms.csv', header=0, index_col=0)
    .sort_values(by='Year')
)
movies['Rotten Tomatoes'] = pd.to_numeric(movies['Rotten Tomatoes'].str.replace('/100', ''))

mean_ratings = movies.groupby('Year')['Rotten Tomatoes'].mean().reset_index()

# Calculate the total sum of movies per year
movie_counts = movies['Year'].value_counts().reset_index()
movie_counts.columns = ['Year', 'Count']
movie_counts = movie_counts.sort_values('Year')

# Calculate the distribution of age ratings
age_distribution = movies['Age'].value_counts().reset_index()
age_distribution.columns = ['Age', 'Count']

movies.drop('ID', axis=1, inplace=True)
movies.drop('Type', axis=1, inplace=True)

# Streaming platforms
platforms = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']

In [3]:
# Streaming platforms data focused on tv shows

tv = (
    pd.read_csv('tv_shows.csv', header=0, index_col=0)
    .sort_values(by='Year')
)
tv['Rotten Tomatoes'] = pd.to_numeric(tv['Rotten Tomatoes'].str.replace('/100', ''))

mean_ratings_tv = tv.groupby('Year')['Rotten Tomatoes'].mean().reset_index()

tv_counts = tv['Year'].value_counts().reset_index()
tv_counts.columns = ['Year', 'Count']
tv_counts = tv_counts.sort_values('Year')

tv.drop('ID', axis=1, inplace=True)
tv.drop('IMDb', axis=1, inplace=True)
tv.drop('Type', axis=1, inplace=True)

age_distribution_tv = tv['Age'].value_counts().reset_index()
age_distribution_tv.columns = ['Age', 'Count']

In [4]:
# Add a 'Type' column to TV dataframe and set it to 'TV', vice versa for movie
movies['Type'] = 'Movie'
tv['Type'] = 'TV'

# Concatenate the movies and TV dataframes vertically
combined_df = pd.concat([movies, tv], ignore_index=True)

### Dashboard Backend

The backend for the layout of the page, the callback functions that support interactive components, and the interactively updating graphs.

In [6]:
# Stylesheets from an external website

external_stylesheets = [
    {
        "href": (
            "https://fonts.googleapis.com/css2?"
            "family=Lato:wght@400;700&display=swap"
        ),
        "rel": "stylesheet",
    },
]

# Layout of the page, grid space determined using style.css

app = Dash(__name__, external_stylesheets=external_stylesheets)
app.title = "Streaming Analytics: Getting To Know Your Streaming Platforms"

app.layout = html.Div(
    children=[
        html.Div(
            children=[
                html.H1(
                    children="Streaming Platform Analytics",
                    className="header-title",
                    ),
                html.P(
                    children=(
                        "Select from drop down bars the streaming platform or form of media"
                    ),
                    className="header-description",
                ),
                html.P(
                    children=(
                        "Choose the start and end date in years through the date range filter"
                    ),
                    className="header-description",
                ),
            ],
            className="header",
        ),
        html.Div(
            children=[
                html.Div(
                    children=[
                        html.Div(
                            children="Platform",
                            className="menu-title"
                        ),
                        dcc.Dropdown(
                            id="platform-filter",
                            options=[
                                {"label": "All", "value": "All"},
                                {"label": "Netflix", "value": "Netflix"},
                                {"label": "Hulu", "value": "Hulu"},
                                {"label": "Prime Video", "value": "Prime Video"},
                                {"label": "Disney+", "value": "Disney+"}
                            ],
                            value="All",
                            clearable=False,
                            className="dropdown"
                        ),
                    ]
                ),
                html.Div(
                    children=[
                        html.Div(
                            children="Type",
                            className="menu-title"
                        ),
                        dcc.Dropdown(
                            id="type-filter",
                            options=[
                                {"label": "All", "value": "All"},
                                {"label": "Movie", "value": "Movie"},
                                {"label": "TV", "value": "TV"}
                            ],
                            value="All",
                            clearable=False,
                            className="dropdown"
                        ),
                    ]
                ),
                html.Div(
                    children=[
                        html.Div(
                            children="Date Range", className="menu-title"
                        ),
                        html.Div(
                            children=[
                                "Start: ",
                                dcc.Input(
                                    id="start-date",
                                    type='number',
                                    value=movies["Year"].min()
                                ),
                            ]
                        ),
                        html.Div(
                            children=[
                                "End: ",
                                dcc.Input(
                                    id="end-date",
                                    type='number',
                                    value=movies["Year"].max()
                                ),
                            ]
                        ),
                    ]
                ),
            ],
            className="menu",
        ),
        html.Div(
            children=[
                html.Div(
                    children=dcc.Graph(
                        id="rating-chart",
                        config={"displayModeBar": False},
                    ),
                    className="card",
                ),
                html.Div(
                    children=dcc.Graph(
                        id="volume-chart",
                        config={"displayModeBar": False},
                    ),
                    className="card",
                ),
                html.Div(
                    children=dcc.Graph(
                        id="age-pie-chart",
                        config={"displayModeBar": False},
                    ),
                    className="card",
                ),
            ],
            className="wrapper",
        ),
    ]
)

# Callback functions for the graphs and filters (dropdown bars and date range)

@app.callback(
    Output("rating-chart", "figure"),
    Output("volume-chart", "figure"),
    Output("age-pie-chart", "figure"),
    Input("platform-filter", "value"),
    Input("type-filter", "value"),
    Input("start-date", "value"),
    Input("end-date", "value"),
)

# Interactive graphs created using the callback functions

def update_charts(platform, type_filter, start_year, end_year):
    # Filter data based on selected platform, type, and year range
    if platform == "All":
        if type_filter == "All":
            filtered_data = combined_df[(combined_df["Year"] >= start_year) & (combined_df["Year"] <= end_year)]
        else:
            filtered_data = combined_df[
                (combined_df["Type"] == type_filter) &
                (combined_df["Year"] >= start_year) &
                (combined_df["Year"] <= end_year)
            ]
    else:
        if type_filter == "All":
            filtered_data = combined_df[
                (combined_df[platform] == 1) &
                (combined_df["Year"] >= start_year) &
                (combined_df["Year"] <= end_year)
            ]
        else:
            filtered_data = combined_df[
                (combined_df[platform] == 1) &
                (combined_df["Type"] == type_filter) &
                (combined_df["Year"] >= start_year) &
                (combined_df["Year"] <= end_year)
            ]

    # Group by 'Year' and calculate average ratings
    mean_ratings = filtered_data.groupby('Year')['Rotten Tomatoes'].mean().reset_index()
    
    # Count number of movies per year
    movie_counts = filtered_data['Year'].value_counts().reset_index()
    movie_counts.columns = ['Year', 'Count']
    movie_counts = movie_counts.sort_values('Year')

    # Calculate the distribution of age ratings
    age_distribution = filtered_data['Age'].value_counts().reset_index()
    age_distribution.columns = ['Age', 'Count']
    
    # Create figures for each plot
    rating_chart_figure = {
        "data": [
            {
                "x": mean_ratings["Year"],
                "y": mean_ratings['Rotten Tomatoes'],
                "type": "lines",
            },
        ],
        "layout": {
            "title": {
                "text": "Average Rating on Rotten Tomatoes",
                "x": 0.05,
                "xanchor": "left",
            },
            "xaxis": {
                "fixedrange": True,
                "showgrid": True,  
            },
            "yaxis": {
                "fixedrange": True,
                "showgrid": True,  
            },
        },
    }

    volume_chart_figure = {
        "data": [
            {
                "x": movie_counts["Year"],
                "y": movie_counts["Count"],
                "type": "lines",
            },
        ],
        "layout": {
            "title": {
                "text": "Number of Movies Per Year",
                "x": 0.05,
                "xanchor": "left",
            },
            "xaxis": {
                "fixedrange": True,
                "showgrid": True, 
            },
            "yaxis": {
                "fixedrange": True,
                "showgrid": True, 
            },
        },
    }

    pie_chart_figure = {
        "data": [
            {
                "labels": age_distribution["Age"],
                "values": age_distribution["Count"],
                "type": "pie",
                "hole": 0.4,
            },
        ],
        "layout": {
            "title": {
                "text": "Age Rating Distribution",
                "x": 0.05,
                "xanchor": "left",
            },
            "colorway": ["#17b897", "#E12D39", "#396285", "#da3b46"],
        },
    }

    return rating_chart_figure, volume_chart_figure, pie_chart_figure

if __name__ == "__main__":
    app.run_server()

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:8050
Press CTRL+C to quit
127.0.0.1 - - [30/May/2023 18:28:10] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [30/May/2023 18:28:10] "GET /assets/style.css?m=1685433081.668877 HTTP/1.1" 304 -
127.0.0.1 - - [30/May/2023 18:28:10] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [30/May/2023 18:28:10] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [30/May/2023 18:28:11] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 304 -
127.0.0.1 - - [30/May/2023 18:28:11] "GET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1" 304 -
127.0.0.1 - - [30/May/2023 18:28:11] "GET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1" 304 -
127.0.0.1 - - [30/May/2023 18:28:11] "POST /_dash-update-component HTTP/1.1" 200 -


### References

Real Python article by Dylan Castillo:

https://realpython.com/python-dash/

TV Show dataset by Ruchi Bhatia:

https://www.kaggle.com/datasets/ruchi798/tv-shows-on-netflix-prime-video-hulu-and-disney

Movies dataset by Ruchi Bhatia:

https://www.kaggle.com/datasets/ruchi798/movies-on-netflix-prime-video-hulu-and-disney