In [1]:
#!/usr/bin/python
# -*- coding: utf-8 -*-
from math import sqrt
# A dictionary of movie critics and their ratings of a small set of movies
critics = {
    'Lisa Rose': {
        'Lady in the Water': 2.5,
        'Snakes on a Plane': 3.5,
        'Just My Luck': 3.0,
        'Superman Returns': 3.5,
        'You, Me and Dupree': 2.5,
        'The Night Listener': 3.0,
    },
    'Gene Seymour': {
        'Lady in the Water': 3.0,
        'Snakes on a Plane': 3.5,
        'Just My Luck': 1.5,
        'Superman Returns': 5.0,
        'The Night Listener': 3.0,
        'You, Me and Dupree': 3.5,
    },
    'Michael Phillips': {
        'Lady in the Water': 2.5,
        'Snakes on a Plane': 3.0,
        'Superman Returns': 3.5,
        'The Night Listener': 4.0,
    },
    'Claudia Puig': {
        'Snakes on a Plane': 3.5,
        'Just My Luck': 3.0,
        'The Night Listener': 4.5,
        'Superman Returns': 4.0,
        'You, Me and Dupree': 2.5,
    },
    'Mick LaSalle': {
        'Lady in the Water': 3.0,
        'Snakes on a Plane': 4.0,
        'Just My Luck': 2.0,
        'Superman Returns': 3.0,
        'The Night Listener': 3.0,
        'You, Me and Dupree': 2.0,
    },
    'Jack Matthews': {
        'Lady in the Water': 3.0,
        'Snakes on a Plane': 4.0,
        'The Night Listener': 3.0,
        'Superman Returns': 5.0,
        'You, Me and Dupree': 3.5,
    },
    'Toby': {'Snakes on a Plane': 4.5, 'You, Me and Dupree': 1.0,
             'Superman Returns': 4.0},
}

## Exercise 1

In [2]:
def sim_tanimoto(prefs, p1, p2):
    '''
    Returns the Tanimoto correlation coefficient for p1 and p2.
    '''
    # Get the list of mutually rated items
    si = {}
    for item in prefs[p1]:
        if item in prefs[p2]:
            si[item] = 1
    # If they are no ratings in common, return 0
    if len(si) == 0:
        return 0
    # Sums of the squares
    sum1Sq = sum([pow(prefs[p1][it], 2) for it in si])
    sum2Sq = sum([pow(prefs[p2][it], 2) for it in si])
    # Sum of the products
    pSum = sum([prefs[p1][it] * prefs[p2][it] for it in si])
    return pSum/(sum1Sq + sum2Sq - pSum)

In [3]:
print(f"sim_tanimoto(critics, 'Lisa Rose', 'Gene Seymour')={sim_tanimoto(critics, 'Lisa Rose', 'Gene Seymour')}")

sim_tanimoto(critics, 'Lisa Rose', 'Gene Seymour')=0.9118773946360154


## Exercise 3 & Tanimoto

In [4]:
def sim_pearson(prefs, p1, p2):
    '''
    Returns the Pearson correlation coefficient for p1 and p2.
    '''
    # Get the list of mutually rated items
    si = {}
    for item in prefs[p1]:
        if item in prefs[p2]:
            si[item] = 1
    # If they are no ratings in common, return 0
    if len(si) == 0:
        return 0
    # Sum calculations
    n = len(si)
    # Sums of all the preferences
    sum1 = sum([prefs[p1][it] for it in si])
    sum2 = sum([prefs[p2][it] for it in si])
    # Sums of the squares
    sum1Sq = sum([pow(prefs[p1][it], 2) for it in si])
    sum2Sq = sum([pow(prefs[p2][it], 2) for it in si])
    # Sum of the products
    pSum = sum([prefs[p1][it] * prefs[p2][it] for it in si])
    # Calculate r (Pearson score)
    num = pSum - sum1 * sum2 / n
    den = sqrt((sum1Sq - pow(sum1, 2) / n) * (sum2Sq - pow(sum2, 2) / n))
    if den == 0:
        return 0
    r = num / den
    return r

In [5]:
def topMatches(prefs, person, n=5, similarity=sim_pearson):
    '''
    Returns the best matches for person from the prefs dictionary. 
    Number of results and similarity function are optional params.
    '''
    scores = [
        (similarity(prefs, person, other), other) 
            for other in prefs if other != person
    ]
    scores.sort()
    scores.reverse()
    return scores[0:n]

In [6]:
def getRecommendations(prefs, person, similarity=sim_pearson):
    '''
    Gets recommendations for a person by using a weighted average
    of every other user's rankings
    '''
    bestPerson = []
    for score, person_name in topMatches(prefs, person):
        bestPerson.append(person_name)

    totals = {}
    simSums = {}
    for other in prefs:
    # Don't compare me to myself
        if other == person and other not in bestPerson:
            continue
        sim = similarity(prefs, person, other)
        # Ignore scores of zero or lower
        if sim <= 0:
            continue
        for item in prefs[other]:
            # Only score movies I haven't seen yet
            if item not in prefs[person] or prefs[person][item] == 0:
                # Similarity * Score
                totals.setdefault(item, 0)
                # The final score is calculated by multiplying each item by the
                #   similarity and adding these products together
                totals[item] += prefs[other][item] * sim
                # Sum of similarities
                simSums.setdefault(item, 0)
                simSums[item] += sim
    # Create the normalized list
    rankings = [(total / simSums[item], item) for (item, total) in
                totals.items()]
    # Return the sorted list
    rankings.sort()
    rankings.reverse()
    return rankings

In [7]:
print(f"getRecommendations(critics, 'Toby'):")
for x in getRecommendations(critics,'Toby'):
    print(f"    {x}")
print(f"getRecommendations(critics, 'Toby', sim_tanimoto):")
for x in getRecommendations(critics,'Toby',sim_tanimoto):
    print(f"    {x}")

getRecommendations(critics, 'Toby'):
    (3.3477895267131017, 'The Night Listener')
    (2.8325499182641614, 'Lady in the Water')
    (2.530980703765565, 'Just My Luck')
getRecommendations(critics, 'Toby', sim_tanimoto):
    (3.4282420752089244, 'The Night Listener')
    (2.794551430377765, 'Lady in the Water')
    (2.392128726188535, 'Just My Luck')


## Exercise 5

In [8]:
import requests
import json
import pandas as pd


API_KEY = '9e7b1c4f54fab0216f46974b23efb8af'
USER_AGENT = 'Nrmnatxt'


def lastfm_get(payload):
    headers = {'user-agent': USER_AGENT}
    url = 'http://ws.audioscrobbler.com/2.0/'
    payload['api_key'] = API_KEY
    payload['format'] = 'json'
    response = requests.get(url, headers=headers, params=payload)
    return response


r = lastfm_get({
    'method': 'track.getsimilar',
    'artist': 'cher',
    'track':  'believe',
})


similartracks = pd.json_normalize(r.json()['similartracks']['track'])
similartracks = similartracks[['name', 'artist.name', 'match']]
print(similartracks.head(10))

                                         name      artist.name     match
0                               Strong Enough             Cher  1.000000
1                              All or Nothing             Cher  0.829831
2                                       Vogue          Madonna  0.452853
3                                     Hung Up          Madonna  0.411874
4                Can't Get You Out of My Head    Kylie Minogue  0.399843
5  I Wanna Dance with Somebody (Who Loves Me)  Whitney Houston  0.337167
6                       ...Baby One More Time   Britney Spears  0.331045
7                                 Bad Romance        Lady Gaga  0.327215
8                                     Wannabe      Spice Girls  0.316490
9                         Waiting for Tonight   Jennifer Lopez  0.307430
