# MovieLens 数据分析任务

In [None]:
import pandas as pd

# 加载数据
movies_df = pd.read_csv("movies.csv")
ratings_df = pd.read_csv("ratings.csv")

# 合并两个数据集
merged_df = pd.merge(ratings_df, movies_df, on="movieId")


## 任务一：平均得分前10的电影

In [None]:
top10_avg_rated = (
    merged_df.groupby(['movieId', 'title'])['rating']
    .mean()
    .reset_index()
    .sort_values(by='rating', ascending=False)
    .head(10)
)
top10_avg_rated


## 任务二：每个类型的平均得分前10的电影

In [None]:
genre_expanded_df = merged_df.copy()
genre_expanded_df['genres'] = genre_expanded_df['genres'].str.split('|')
genre_expanded_df = genre_expanded_df.explode('genres')

genre_movie_avg = (
    genre_expanded_df.groupby(['genres', 'movieId', 'title'])['rating']
    .mean()
    .reset_index()
)

top10_by_genre = genre_movie_avg.sort_values(['genres', 'rating'], ascending=[True, False])
top10_by_genre = top10_by_genre.groupby('genres').head(10)
top10_by_genre


## 任务三：每个用户评分最好的前5类型

In [None]:
user_genre_rating = (
    genre_expanded_df.groupby(['userId', 'genres'])['rating']
    .mean()
    .reset_index()
)

top5_genres_by_user_rating = user_genre_rating.sort_values(['userId', 'rating'], ascending=[True, False])
top5_genres_by_user_rating = top5_genres_by_user_rating.groupby('userId').head(5)
top5_genres_by_user_rating


## 任务四：每个用户观影次数最多的前5类型

In [None]:
user_genre_count = (
    genre_expanded_df.groupby(['userId', 'genres'])['rating']
    .count()
    .reset_index(name='view_count')
)

top5_genres_by_user_views = user_genre_count.sort_values(['userId', 'view_count'], ascending=[True, False])
top5_genres_by_user_views = top5_genres_by_user_views.groupby('userId').head(5)
top5_genres_by_user_views
