In [55]:
import pandas as pd
import altair as alt
alt.data_transformers.enable("vegafusion")

# Load your dataset
data_path = "data/movie_dataset.csv"
df = pd.read_csv(data_path)

# Convert 'release_date' column to datetime
df['release_date'] = pd.to_datetime(df['release_date'])

# Split genres into separate rows
df_genre_split = df.assign(genres=df['genres'].str.split(', ')).explode('genres')

# 1. Create a scatter plot for revenue and year
scatter_revenue_year = alt.Chart(df).mark_circle().encode(
    x='year(release_date):T',
    y='revenue:Q',
    color='revenue:Q',
    size=alt.Size('revenue:Q', scale=alt.Scale(range=[50, 500])),
    tooltip=['title:N', 'revenue:Q', 'year(release_date):T']
).properties(
    width=600,
    height=400,
    title='Revenue vs. Year'
)
# 1. Create a line chart for revenue over the years
line_revenue_genre = alt.Chart(df_genre_split).mark_line().encode(
    x=alt.X('year(release_date):T', axis=alt.Axis(title='Year')),
    y=alt.Y('mean(revenue):Q', axis=alt.Axis(title='Mean Revenue')),
    color=alt.Color('genres:N', legend=alt.Legend(title='Genre')),
    tooltip=['year(release_date):T', 'mean(revenue):Q', 'genres:N']
).properties(
    width=600,
    height=400,
    title='Mean Revenue Over the Years by Genre'
)



# 2. Create chart for revenue and genre
bar_revenue_genre = alt.Chart(df_genre_split).mark_bar().encode(
    x=alt.X('revenue:Q', axis=alt.Axis(title='Revenue')),
    y=alt.Y('genres:N', axis=alt.Axis(title='Genre'), sort='-x'),
    color=alt.Color('genres:N', legend=None),
    tooltip=['genres:N', 'revenue:Q']
).properties(
    width=600,
    height=400,
    title='Revenue by Genre'
)

# Create a scatter plot for revenue and rating with color gradient
scatter_revenue_rating = alt.Chart(df).mark_circle().encode(
    x='vote_average:Q',
    y='revenue:Q',
    color=alt.Color('vote_average:Q', scale=alt.Scale(scheme='viridis'), legend=alt.Legend(title='Rating')),
    size=alt.Size('revenue:Q', scale=alt.Scale(range=[50, 500])),
    tooltip=['title:N', 'revenue:Q', 'vote_average:Q']
).properties(
    width=600,
    height=400,
    title='Revenue vs. Rating'
)

# Create a scatter for revenue and duration
scatter_revenue_duration = alt.Chart(df).mark_circle().encode(
    x='runtime:Q',
    y='revenue:Q',
    color=alt.Color('revenue:Q', scale=alt.Scale(scheme='viridis')),
    size=alt.Size('revenue:Q', scale=alt.Scale(range=[50, 500])),
    tooltip=['title:N', 'revenue:Q', 'runtime:Q']
).properties(
    width=600,
    height=400,
    title='Revenue vs. Duration'
)

# Display charts
(scatter_revenue_year | line_revenue_genre | bar_revenue_genre | scatter_revenue_rating | scatter_revenue_duration)
