### ALS - recommendation engine

http://localhost:8888/notebooks/Mastering-Big-Data-Analytics-with-PySpark/Section%204%20-%20Machine%20Learning%20with%20Spark%20MLlib/4.3/hands-on-4.3.ipynb
        

In [None]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as f

In [5]:
from pyspark.ml.recommendation import ALS

In [1]:
spark = SparkSession.builder.appName("Chapter4-3").getOrCreate()

In [2]:
DATASET_HOME = "/home/wengong/projects/bigdata/Mastering-Big-Data-Analytics-with-PySpark/data-sets"
FILE_CSV = "ratings.csv"

ratings = (
    spark.read.csv(
        path=f"{DATASET_HOME}/ml-latest-small/{FILE_CSV}",
        sep=",",
        header=True,
        quote='"',
        schema="userId INT, movieId INT, rating DOUBLE, timestamp INT",
    )
    # .withColumn("timestamp", f.to_timestamp(f.from_unixtime("timestamp")))
    .drop("timestamp")
    .cache()
)

In [4]:
print(ratings.count())
ratings.show(5)

100836
+------+-------+------+
|userId|movieId|rating|
+------+-------+------+
|     1|      1|   4.0|
|     1|      3|   4.0|
|     1|      6|   4.0|
|     1|     47|   5.0|
|     1|     50|   5.0|
+------+-------+------+
only showing top 5 rows



In [6]:
model = (
    ALS(
        userCol="userId",
        itemCol="movieId",
        ratingCol="rating",
    ).fit(ratings)
)

In [7]:
predictions = model.transform(ratings)
predictions.show(10, False)

+------+-------+------+----------+
|userId|movieId|rating|prediction|
+------+-------+------+----------+
|191   |148    |5.0   |4.9248567 |
|133   |471    |4.0   |3.2572494 |
|597   |471    |2.0   |3.6570044 |
|385   |471    |4.0   |3.4311826 |
|436   |471    |3.0   |3.31421   |
|602   |471    |4.0   |3.5073528 |
|91    |471    |1.0   |2.467201  |
|409   |471    |3.0   |3.706752  |
|372   |471    |3.0   |3.2248528 |
|599   |471    |2.5   |2.973057  |
+------+-------+------+----------+
only showing top 10 rows



In [8]:
model.userFactors.show(5, truncate=False)

+---+----------------------------------------------------------------------------------------------------------------------------+
|id |features                                                                                                                    |
+---+----------------------------------------------------------------------------------------------------------------------------+
|10 |[-0.69687176, -0.6618054, 0.38519007, 0.4278401, -0.09611353, 1.4546747, 0.64653957, -0.14659446, 0.7070822, 0.58935237]    |
|20 |[-0.48073864, 0.06653299, 0.13992442, 0.28669077, 0.0070511946, 1.2487173, 1.2842805, 0.59803575, 0.0060777753, -0.83264667]|
|30 |[0.020974442, -0.47584122, -0.3984823, 0.23272066, 0.0872139, 1.4422855, 0.83542436, 0.90527105, 0.52816486, -0.48924994]   |
|40 |[-0.96753615, -0.671791, -0.24671002, 0.09230446, 0.41576803, 0.9055569, 0.940425, 0.9956637, -0.29421473, -0.45743206]     |
|50 |[-0.5733293, -0.03722297, 0.10560249, 0.14409132, 0.6271811, 0.59021753, 0.441

In [9]:
model.itemFactors.show(5, truncate=False)

+---+--------------------------------------------------------------------------------------------------------------------------+
|id |features                                                                                                                  |
+---+--------------------------------------------------------------------------------------------------------------------------+
|10 |[-0.48427957, -0.4518795, -0.6895879, 0.07755869, 0.058386512, 1.4977772, 0.59792244, 0.83899456, 0.3995762, -0.26254237] |
|20 |[-0.5258346, -0.3530948, -0.26316857, -0.34943056, 0.09309245, 1.526627, 0.14786619, -0.08517679, 0.27492762, -0.71944493]|
|30 |[-0.6527545, 0.1622217, 1.1696619, -0.2798836, 1.1958413, 0.5888025, 0.26203138, 0.9677523, -0.3373769, -0.617144]        |
|40 |[-0.4743504, -0.53137994, -0.071397506, -0.5132354, 0.41302308, 1.3406467, 0.31028253, 1.1619114, 0.78377295, 0.41432565] |
|50 |[-0.6279297, -0.50679433, -0.021975808, 0.20652792, 0.53963834, 1.6165197, 0.60580397, 1.159