In [1]:
from pyspark.sql import SparkSession

In [2]:
spark = SparkSession.builder.appName("MoviesAnalysis").getOrCreate()

In [3]:
spark

In [4]:
movies_df = spark.read.csv("movies.csv", header=True, inferSchema=True)

In [5]:
movies_df.show()

+-------+--------------------+-------+------+-------+
|MovieID|               Title|  Genre|Rating|  Votes|
+-------+--------------------+-------+------+-------+
|      1|The Shawshank Red...|  Drama|   9.3|2345678|
|      2|       The Godfather|  Crime|   9.2|1657452|
|      3|     The Dark Knight| Action|   9.0|2456789|
|      4|        Pulp Fiction|  Crime|   8.9|1894321|
|      5|The Lord of the R...|Fantasy|   8.8|1298456|
|      6|        Forrest Gump|  Drama|   8.8|1658345|
|      7|           Inception| Sci-Fi|   8.7|2089765|
|      8|          Fight Club|  Drama|   8.8|1734567|
|      9|          The Matrix| Sci-Fi|   8.7|2001453|
|     10|          Goodfellas|  Crime|   8.7|1423567|
+-------+--------------------+-------+------+-------+



In [6]:
movies_df.createOrReplaceTempView("movies")

In [7]:
top_genres = spark.sql("""
    SELECT Genre, ROUND(AVG(Rating), 2) AS AvgRating
    FROM movies
    GROUP BY Genre
    ORDER BY AvgRating DESC
    LIMIT 3
""")

top_genres.show()

+------+---------+
| Genre|AvgRating|
+------+---------+
|Action|      9.0|
| Drama|     8.97|
| Crime|     8.93|
+------+---------+



In [8]:
most_voted_movie = spark.sql("""
    SELECT Title, Votes
    FROM movies
    ORDER BY Votes DESC
    LIMIT 1
""")

most_voted_movie.show()

+---------------+-------+
|          Title|  Votes|
+---------------+-------+
|The Dark Knight|2456789|
+---------------+-------+



In [9]:
movie_count_by_genre = spark.sql("""
    SELECT Genre, COUNT(*) AS MovieCount
    FROM movies
    GROUP BY Genre
    ORDER BY MovieCount DESC
""")

movie_count_by_genre.show()

+-------+----------+
|  Genre|MovieCount|
+-------+----------+
|  Crime|         3|
|  Drama|         3|
| Sci-Fi|         2|
|Fantasy|         1|
| Action|         1|
+-------+----------+

