# System C: TV Show Critics

1. View 1: Popularity vs. Ratings
Visualization Technique: Scatter Plot
Purpose: Show the relationship between TV show popularity and ratings.
Brushing & Linking: Selection in this view will filter other views based on selected popularity or ratings thresholds.

2. View 2: Genre Breakdown
Visualization Technique: Treemap
Purpose: Show the breakdown of TV show genres in a hierarchical structure.
Brushing & Linking: Selection in this view will filter other views based on the selected genre.

3. View 3: Season-wise Ratings
Visualization Technique: Heatmap
Purpose: Show the average ratings of TV shows across different seasons.
Brushing & Linking: Selection in this view will filter other views based on the selected season.

4. View 4: Network Comparison
Visualization Technique: Bubble chart
Purpose: Show the comparison of TV show networks based on various metrics like ratings, popularity, etc.
Brushing & Linking: Selection in this view will filter other views based on the selected network.

In [13]:
import altair as alt
import pandas as pd
from IPython.display import display

file_path = r'C:\Users\aadit\Downloads\Documents\Information Visualization\TMDB_tv_dataset_v3.csv'
cvd_data = pd.read_csv(file_path)

tv_shows_data = cvd_data
tv_shows_data.dropna(inplace=True)

tv_shows_data = tv_shows_data.sample(n=500,random_state=40)

alt.data_transformers.enable('json')
tv_shows_data.head(2)

Unnamed: 0,id,name,number_of_seasons,number_of_episodes,original_language,vote_count,vote_average,overview,adult,backdrop_path,...,tagline,genres,created_by,languages,networks,origin_country,spoken_languages,production_companies,production_countries,episode_run_time
885,73544,Warrior,3,30,en,460,8.027,"A gritty, action-packed crime drama set during...",False,/aXZmDoGywrHlv2OIvurlCiTXkXv.jpg,...,This isn't China. This is Chinatown.,"Crime, Drama, Action & Adventure",Jonathan Tropper,"en, zh","Cinemax, Max",US,"English, 普通话","Perfect Storm Entertainment, Bruce Lee Enterta...",United States of America,55
1245,63522,F is for Family,5,44,en,315,7.344,"Follow the Murphy family back to the 1970s, wh...",False,/bdntcURJ4dSr7zoLeyB6ZGrMynD.jpg,...,Kick 'em in the cul-de-sac.,"Comedy, Animation","Bill Burr, Michael Price",en,Netflix,US,English,"Wild West Television, Gaumont International Te...","France, United States of America",26


In [14]:
# View 1: Popularity vs. Ratings
# View 3: Scatter Plot with Square Markers (System A)
scatter_square_chart_system_a = alt.Chart(tv_shows_data).mark_square(size=100).encode(
    x=alt.X('number_of_episodes:Q', title='Number of Episodes'),
    y=alt.Y('popularity:Q', title='Popularity'),
    color=alt.Color('popularity:Q', scale=alt.Scale(scheme='viridis'), title='Popularity'),
    tooltip=['name', 'number_of_episodes', 'popularity']
).properties(
    title='Scatter Plot with Square Markers (System A)'
).interactive()

display(scatter_square_chart_system_a)


In [15]:
# View 2: Genre Breakdown
top_genres = tv_shows_data['genres'].value_counts().nlargest(10).index.tolist()

tv_shows_data['genres'] = tv_shows_data['genres'].apply(lambda x: x if x in top_genres else 'Others')

genre_breakdown_chart = alt.Chart(tv_shows_data).transform_aggregate(
    count='count()',
    groupby=['genres']
).mark_rect().encode(
    x=alt.X('genres:N', title='Genre'),
    y=alt.Y('count:Q', title='Number of Shows'),
    color=alt.Color('genres:N', legend=None),
    tooltip=['genres:N', 'count:Q']
).properties(
    title='Genre Breakdown of TV Shows',
    width=850,
    height=300 
).interactive()

display(genre_breakdown_chart)

In [16]:
# View 3: Season-wise Ratings
season_ratings_chart = alt.Chart(tv_shows_data).mark_rect().encode(
    x=alt.X('number_of_seasons:O', title='Number of Seasons'),
    y=alt.Y('mean(vote_average):Q', title='Mean Vote Average'),
    color=alt.Color('mean(vote_average):Q', legend=alt.Legend(title='Mean Vote Average')),
    tooltip=['number_of_seasons:O', alt.Tooltip('mean(vote_average)', title='Mean Vote Average')]
).properties(
    title='Season-wise Ratings of TV Shows',
    width=600,
    height=300 
).interactive()

display(season_ratings_chart)

In [25]:
# View 4: Network Comparison

# Filter the dataset to include only the top 20 networks
top_20_networks = tv_shows_data['networks'].value_counts().nlargest(20).index.tolist()
filtered_tv_shows_data = tv_shows_data[tv_shows_data['networks'].isin(top_20_networks)]

# Create the bubble chart for the top 20 networks
network_comparison_bubble_chart = alt.Chart(filtered_tv_shows_data).mark_circle().encode(
    x=alt.X('networks:N', title='Network'),
    y=alt.Y('mean(vote_average):Q', title='Mean Vote Average', scale=alt.Scale(zero=False)),
    size=alt.Size('count():Q', legend=None, scale=alt.Scale(range=[100, 1000])),  # Adjust the size based on count
    color=alt.Color('mean(vote_average):Q', title='Mean Vote Average', scale=alt.Scale(scheme='viridis')),
    tooltip=['networks:N', alt.Tooltip('mean(vote_average)', title='Mean Vote Average')]
).transform_filter(
    alt.FieldOneOfPredicate(field='networks', oneOf=top_20_networks)
).properties(
    title='Top 20 Network Comparison of TV Shows (Bubble Chart)',
    width=850,
    height=400 
).interactive()

display(network_comparison_bubble_chart)


