# MovieLens 数据分析 - SQL 版本

## 假设表结构
- `movies(movieId INT, title TEXT, genres TEXT)`
- `ratings(userId INT, movieId INT, rating FLOAT, timestamp BIGINT)`

## 任务一：列出平均得分前10的电影

In [None]:
SELECT 
    m.title,
    AVG(r.rating) AS avg_rating
FROM 
    ratings r
JOIN 
    movies m ON r.movieId = m.movieId
GROUP BY 
    m.title
ORDER BY 
    avg_rating DESC
LIMIT 10;


## 任务二：每个类型的平均得分前10的电影（使用 PostgreSQL）

In [None]:
WITH genre_expanded AS (
    SELECT 
        r.rating,
        m.movieId,
        m.title,
        unnest(string_to_array(m.genres, '|')) AS genre
    FROM 
        ratings r
    JOIN 
        movies m ON r.movieId = m.movieId
)
SELECT 
    genre,
    title,
    AVG(rating) AS avg_rating
FROM 
    genre_expanded
GROUP BY 
    genre, title
ORDER BY 
    genre, avg_rating DESC;


## 任务三：每个用户评分最高的前5个电影类型

In [None]:
WITH genre_expanded AS (
    SELECT 
        r.userId,
        r.rating,
        unnest(string_to_array(m.genres, '|')) AS genre
    FROM 
        ratings r
    JOIN 
        movies m ON r.movieId = m.movieId
),
genre_avg AS (
    SELECT 
        userId,
        genre,
        AVG(rating) AS avg_rating,
        ROW_NUMBER() OVER (PARTITION BY userId ORDER BY AVG(rating) DESC) AS rank
    FROM 
        genre_expanded
    GROUP BY 
        userId, genre
)
SELECT 
    userId, genre, avg_rating
FROM 
    genre_avg
WHERE 
    rank <= 5;


## 任务四：每个用户观影次数最多的前5个电影类型

In [None]:
WITH genre_expanded AS (
    SELECT 
        r.userId,
        unnest(string_to_array(m.genres, '|')) AS genre
    FROM 
        ratings r
    JOIN 
        movies m ON r.movieId = m.movieId
),
genre_counts AS (
    SELECT 
        userId,
        genre,
        COUNT(*) AS view_count,
        ROW_NUMBER() OVER (PARTITION BY userId ORDER BY COUNT(*) DESC) AS rank
    FROM 
        genre_expanded
    GROUP BY 
        userId, genre
)
SELECT 
    userId, genre, view_count
FROM 
    genre_counts
WHERE 
    rank <= 5;
