In [13]:
from __future__ import print_function
from pyspark import SparkContext
from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating 

In [14]:
if __name__ == "__main__":
    sc = SparkContext.getOrCreate()
    data = sc.textFile("../Datasets/test.data")
    ratings = data.map(lambda l: l.split(','))\
        .map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2])))
        
    # Build the recommendation model using Alternating Least Squares 
    rank = 10
    numIterations = 10
    model = ALS.train(ratings, rank, numIterations)
    
    # Evaluate the model on training data 
    testdata = ratings.map(lambda p: (p[0],p[1]))
    predictions = model.predictAll(testdata).map(lambda r : ((r[0],r[1],r[2])))
    ratesAndPreds = ratings.map(lambda r : ((r[0], r[1], r[2]))).join(predictions)
    
    print("pred")
    print(predictions.collect())
    
    print("rates and preds")
    print(ratesAndPreds.collect())
    
    MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean()
    print("Mean Squared Error = " + str(MSE))    

pred
[(1, 1, 4.997241980956678), (1, 2, 1.0012010408264338), (1, 3, 4.997241980956678), (1, 4, 1.0012010408264338), (2, 1, 4.997241980956678), (2, 2, 1.0012010408264338), (2, 3, 4.997241980956678), (2, 4, 1.0012010408264338), (3, 1, 1.0014843526093313), (3, 2, 4.995825517237339), (3, 3, 1.0014843526093313), (3, 4, 4.995825517237339), (4, 1, 1.0014843526093313), (4, 2, 4.995825517237339), (4, 3, 1.0014843526093313), (4, 4, 4.995825517237339)]
rates and preds
[(1, (1, 1)), (1, (1, 2)), (1, (1, 3)), (1, (1, 4)), (1, (2, 1)), (1, (2, 2)), (1, (2, 3)), (1, (2, 4)), (1, (3, 1)), (1, (3, 2)), (1, (3, 3)), (1, (3, 4)), (1, (4, 1)), (1, (4, 2)), (1, (4, 3)), (1, (4, 4)), (2, (1, 1)), (2, (1, 2)), (2, (1, 3)), (2, (1, 4)), (2, (2, 1)), (2, (2, 2)), (2, (2, 3)), (2, (2, 4)), (2, (3, 1)), (2, (3, 2)), (2, (3, 3)), (2, (3, 4)), (2, (4, 1)), (2, (4, 2)), (2, (4, 3)), (2, (4, 4)), (3, (1, 1)), (3, (1, 2)), (3, (1, 3)), (3, (1, 4)), (3, (2, 1)), (3, (2, 2)), (3, (2, 3)), (3, (2, 4)), (3, (3, 1)), (3, 

In [15]:
# Save and load model 
model.save(sc,"../MLlib/model/")
sameModel = MatrixFactorizationModel.load(sc,"../MLlib/model/")