In [2]:
import pandas as pd
import altair as alt

# Load data
file_path = 'data/cleaned_TMDB_tv_dataset.csv'
tv_shows_data = pd.read_csv(file_path)

# Filter the DataFrame
filtered_tv_shows_data = tv_shows_data[(tv_shows_data['episode_run_time'] <= 100) & 
                                       (tv_shows_data['episode_run_time'] <= 400) & 
                                       (tv_shows_data['origin_country'] == 'US') & 
                                       (tv_shows_data['first_air_date'] >= '2010-01-01') &
                                       (tv_shows_data['vote_average'] >= 5)]

# Task 1: Analyze TV show runtime and its relationship with popularity
runtime_popularity_chart = alt.Chart(filtered_tv_shows_data).mark_circle().encode(
    x='episode_run_time',
    y='vote_average',
    color=alt.Color('popularity', scale=alt.Scale(scheme='category20')),
    size='vote_count',
    tooltip=['name', 'episode_run_time', 'vote_average', 'vote_count', 'popularity']
).interactive().properties(
    width=800,
    height=600,
    title='Interactive Exploration: Episode Runtime vs Vote Average vs Vote Count (US TV Shows from 2010 with Runtime <= 400, <= 100, and Vote Average >= 5)'
)

#Create Scatter Plot
scatter_plot = alt.Chart(filtered_tv_shows_data).mark_circle().encode(
    x='vote_count',
    y='vote_average',
    color=alt.Color('popularity', scale=alt.Scale(scheme='category20')),
    tooltip=['name', 'vote_count', 'vote_average', 'popularity']  # Add tooltips
).properties(
    width=400,
    height=400
).interactive()

# Define selection
selection = alt.selection_interval()

# Create histogram for vote count distribution
vote_average_histogram = alt.Chart(filtered_tv_shows_data).mark_bar().encode(
    x=alt.X('vote_average', bin=True),
    y='count()',
    color=alt.condition(selection, alt.value('steelblue'), alt.value('lightgray'))
).properties(
    width=400,
    height=200
)

# Link the scatter plot and histogram
linked_chart = scatter_plot & vote_average_histogram.encode(x='vote_average').add_params(selection)

linked_chart | runtime_popularity_chart
