In [None]:
!pip install pyspark

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyspark
  Downloading pyspark-3.4.0.tar.gz (310.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.8/310.8 MB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.4.0-py2.py3-none-any.whl size=311317145 sha256=9a45c774b68f45aad48c00b45354df1055053e61d43bf32efc50e8d9410eb353
  Stored in directory: /root/.cache/pip/wheels/9f/34/a4/159aa12d0a510d5ff7c8f0220abbea42e5d81ecf588c4fd884
Successfully built pyspark
Installing collected packages: pyspark
Successfully installed pyspark-3.4.0


In [None]:
from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.sql.types import StructType, StructField, IntegerType, DoubleType

In [None]:
# Create a SparkSession
spark = SparkSession.builder.appName("ALSModel").getOrCreate()

In [None]:
# Define the schema for the ratings data
schema = StructType([
    StructField("user_id", IntegerType(), True),
    StructField("movie_id", IntegerType(), True),
    StructField("rating", DoubleType(), True),
    StructField("timestamp", IntegerType(), True)
])

In [None]:
# Load the ratings data into a DataFrame
ratings_df = spark.read.csv("/content/ratings.dat", sep="::", schema=schema)

In [None]:
(training_df, testing_df) = ratings_df.randomSplit([0.6, 0.4], seed=40)

In [None]:
als = ALS(maxIter=10, regParam=0.01, userCol="user_id", itemCol="movie_id", ratingCol="rating",
          coldStartStrategy="drop")
model = als.fit(training_df)

In [None]:
predictions = model.transform(testing_df)

In [None]:
# Evaluate the model using MSE
evaluator = RegressionEvaluator(metricName="mse", labelCol="rating", predictionCol="prediction")
mse = evaluator.evaluate(predictions)

# Print the MSE
print("Mean Squared Error (MSE):", mse)

# Stop the SparkSession
spark.stop()

Mean Squared Error (MSE): 0.8665604056821231
