## Recommendation Movies

In [2]:
# A dictionary of movie critics and their ratings of a small
# set of movies
critics={'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,
 'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5, 
 'The Night Listener': 3.0},
'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5, 
 'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0, 
 'You, Me and Dupree': 3.5}, 
'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,
 'Superman Returns': 3.5, 'The Night Listener': 4.0},
'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,
 'The Night Listener': 4.5, 'Superman Returns': 4.0, 
 'You, Me and Dupree': 2.5},
'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0, 
 'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,
 'You, Me and Dupree': 2.0}, 
'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
 'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},
'Toby': {'Snakes on a Plane':4.5,'You, Me and Dupree':1.0,'Superman Returns':4.0}}


from math import sqrt

# similarity distance between 
def sim_distance(prefs, person1, person2):
    si = {}
    for item in prefs[person1]:
        if item in prefs[person2]:
            si[item] = 1
            
            
    if len(si) == 0:
        return 0
    
    
    sum_of_suqares = sum([pow(prefs[person1][item] - prefs[person2][item], 2) for item in prefs[person1] if item in prefs[person2]])
    
    return 1 / (1.0 + sqrt(sum_of_suqares))

    
print sim_distance(critics, "Gene Seymour", "Toby")
    

item =  Snakes on a Plane
item =  Superman Returns
item =  You, Me and Dupree
0.258245699761


In [10]:
#Peason correlation score
#https://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient

def sim_pearson_distance(prefs, person1, person2):
    si = {}
    for item in prefs[person1]:
        if item in prefs[person2]:
            si[item] = 1
            
    n = len(si)
    
    if n == 0:
        return 0
    
    sum1 = sum([prefs[person1][item] for item in si])
    sum2 = sum([prefs[person2][item] for item in si])
    
    sq_sum1 = sum([pow(prefs[person1][item], 2) for item in si])
    sq_sum2 = sum([pow(prefs[person2][item], 2) for item in si])
    
    product_sum = sum([prefs[person1][item] * prefs[person2][item] for item in si])
    
    num = (product_sum - sum1 * sum2/n)
    den = sqrt((sq_sum1 - pow(sum1, 2) / n) * (sq_sum2 - pow(sum2, 2)/n))
    if den == 0:
        return 0
    
    r = num / den
    
    return r
  
    
print sim_pearson_distance(critics, "Gene Seymour", "Lisa Rose")    

0.396059017191


In [14]:
def top_matches(prefs, person, n = 5, similarity = sim_pearson_distance):
    scores = [(similarity(prefs, person, other), other) for other in prefs if other != person]
    
    scores.sort()
    scores.reverse()
    return scores[0:n]

print top_matches(critics, "Toby", 3)


[(0.9912407071619299, 'Lisa Rose'), (0.9244734516419049, 'Mick LaSalle'), (0.8934051474415647, 'Claudia Puig')]


In [20]:
def get_recommendations(prefs, person, similarity = sim_pearson_distance):
    totals = {}
    sim_sums = {}
    for other in prefs:
        if other == person:
            continue
        
        sim = similarity(prefs, person, other)
        if sim <= 0:  #quite different preference
            continue
            
        for item in prefs[other]:
            if item not in prefs[person] or prefs[person][item] == 0:
                totals.setdefault(item, 0)
                totals[item] += prefs[other][item] * sim
                sim_sums.setdefault(item, 0)
                sim_sums[item] += sim
        
    

    
    rankings = [((total/sim_sums[item]), item) for item, total in totals.items()]
    
    rankings.sort()
    rankings.reverse()
    return rankings

print get_recommendations(critics, "Toby")

                

[(3.3477895267131013, 'The Night Listener'), (2.8325499182641614, 'Lady in the Water'), (2.5309807037655645, 'Just My Luck')]


## Matching Products

In [30]:
def transform_prefs(prefs):
    result = {}
    for person in prefs:
        for item in prefs[person]:
            if prefs[person][item] == None :
                continue
                
            result.setdefault(item, {})            
            result[item][person] = prefs[person][item]
            
    return result

movies = transform_prefs(critics)

print get_recommendations(movies, "Just My Luck")


[(4.0, 'Michael Phillips'), (3.0, 'Jack Matthews')]
