In [None]:
import pandas as pd
import numpy as np
import seaborn as sns

df=pd.read_csv("../data/movie_ratings.csv")
df.head()

# print(df.groupby("genres")["rating"].agg(["mean"]))

df['decade'] = pd.to_numeric(df['decade'], errors='coerce').astype('Int64')

In [None]:
print(df.info())
print(df.describe())
print(df.head())

In [None]:
df_genres=df.copy()

df_genres['genres'] = df_genres['genres'].str.split('|')
df_genres=df_genres.explode('genres')
df_genres.head()



genre_stats = (
    df_genres.groupby('genres')['rating']
    .agg(['mean','count'])
    .query('count >= 1000')
    .sort_values('mean', ascending=False)
    .reset_index()
)

In [None]:
chart_1= sns.barplot(
    data=genre_stats,
    x='mean',
    y='genres',
)

chart_1.set_xlabel("Average Rating")
chart_1.set_ylabel("Genres")


sns.despine()
print(genre_stats.head())

In [None]:
decade_stats = (
    df.groupby('decade')['rating']
    .agg(['mean','count'])
    .reset_index()
    .sort_values('decade')
)

# Ratings count by decade
chart_2 = sns.barplot(data=decade_stats, x='decade', y='count')
chart_2.set_xlabel("Decade")
chart_2.set_ylabel("Count")

In [None]:
chart_3 = sns.lineplot(data=decade_stats, x='decade', y='mean', marker='o')
chart_3.set_xlabel("Decade")
chart_3.set_ylabel("Mean Rating")

In [None]:
movie_stats = (
    df.groupby(['movie_id', 'title'])['rating']
    .agg(mean='mean', count='count')
    .query('count >= 350')
    .sort_values(['mean', 'count'], ascending=[False, False])
    .head(10)
    .reset_index()
)

chart_4 = sns.catplot(
    data=movie_stats,
    x = 'mean',
    y = 'title',
    kind = 'bar',
    height = 4,
    aspect = 2
)

chart_4.set_axis_labels("Average Rating", "Movie (Release Year)")

# Add count labels on bars
for i, (val, cnt) in enumerate(zip(movie_stats['mean'], movie_stats['count'])):
    chart_4.ax.text(val + 0.02, i, f"{cnt}", va='center')