# Movie Recommendations using MLib (Alternate Least Squares)

In [1]:
import findspark
findspark.init()

import pyspark
findspark.find()

'c:\\spark'

In [3]:
import sys
from pyspark import SparkConf, SparkContext
from pyspark.mllib.recommendation import ALS, Rating

def loadMovieNames():
    movieNames = {}
    with open("u.item", encoding='ascii', errors="ignore") as f:
        for line in f:
            fields = line.split('|')
            movieNames[int(fields[0])] = fields[1]
    return movieNames

In [4]:
conf = SparkConf().setMaster("local[*]").setAppName("MovieRecommendationsALS")
sc = SparkContext(conf = conf)
sc.setCheckpointDir('checkpoint')

In [7]:
print("\nLoading movie names...")
nameDict = loadMovieNames()

data = sc.textFile("u.data")

ratings = data.map(lambda x: x.split()).map(lambda x: Rating(int(x[0]), int(x[1]), float(x[2]))).cache()


Loading movie names...


In [8]:
# Build the recommendation model using Alternating Least Squares
print("\nTraining recommendation model...")

rank = 10

# Lowered numIterations to ensure it works on lower-end systems
numIterations = 6
model = ALS.train(ratings, rank, numIterations)


Training recommendation model...


In [11]:
### define userID as custom param
userID = 2

print("\nRatings for user ID " + str(userID) + ":")
userRatings = ratings.filter(lambda x: x[0] == userID)
for rating in userRatings.collect():
    print (nameDict[int(rating[1])] + ": " + str(rating[2]))

print("\nTop 10 recommendations:")
recommendations = model.recommendProducts(userID, 10)
for recommendation in recommendations:
    print (nameDict[int(recommendation[1])] + \
        " score " + str(recommendation[2]))


Ratings for user ID 2:
Rosewood (1997): 4.0
Shall We Dance? (1996): 5.0
Star Wars (1977): 5.0
3 Ninjas: High Noon At Mega Mountain (1998): 1.0
Ulee's Gold (1997): 4.0
Fierce Creatures (1997): 3.0
Midnight in the Garden of Good and Evil (1997): 3.0
River Wild, The (1994): 3.0
Mighty Aphrodite (1995): 4.0
Up Close and Personal (1996): 3.0
Ulee's Gold (1997): 4.0
FairyTale: A True Story (1997): 3.0
Devil's Advocate, The (1997): 3.0
Men in Black (1997): 4.0
As Good As It Gets (1997): 5.0
Apt Pupil (1998): 1.0
In & Out (1997): 4.0
Titanic (1997): 5.0
Once Upon a Time... When We Were Colored (1995): 4.0
Hoodlum (1997): 4.0
Face/Off (1997): 3.0
Antonia's Line (1995): 3.0
Restoration (1995): 4.0
Time to Kill, A (1996): 4.0
Truth About Cats & Dogs, The (1996): 4.0
Contact (1997): 3.0
Breakdown (1997): 4.0
Kolya (1996): 5.0
Emma (1996): 5.0
Leaving Las Vegas (1995): 4.0
Toy Story (1995): 4.0
Ice Storm, The (1997): 3.0
Postino, Il (1994): 4.0
Marvin's Room (1996): 3.0
Absolute Power (1997): 3.0
