## Importing Libraries and loading data

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Example with TF-IDF Implementation with python (Mannually)

In [7]:
# Two simple documents containing one sentence each.

documentA = 'the man went out for a walk'
documentB = 'the children sat around the fire'

# splite sentence based on space
bowA = documentA.split(' ')
bowB = documentB.split(' ')

# find unique words in both the sentence
unique_words = set(bowA).union(set(bowB))

# create dictionary of words and their occurence for each document.
numOfWordsA = dict.fromkeys(unique_words, 0)
for word in bowA:
    numOfWordsA[word] += 1; 
    
# create dictionary of words and their occurence for each document.    
numOfWordsB = dict.fromkeys(unique_words, 0)    
for word in bowB:
    numOfWordsB[word] += 1;        

print(numOfWordsA)
print(numOfWordsB)


{'a': 1, 'fire': 0, 'around': 0, 'man': 1, 'the': 1, 'sat': 0, 'walk': 1, 'went': 1, 'for': 1, 'out': 1, 'children': 0}
{'a': 0, 'fire': 1, 'around': 1, 'man': 0, 'the': 2, 'sat': 1, 'walk': 0, 'went': 0, 'for': 0, 'out': 0, 'children': 1}


### Term Frequency (TF)

In [8]:
# compute TF
def computeTF(wordDict, bag_of_words):
    print(wordDict, bag_of_words)
    tfDict = {}
    bag_of_words_count = len(bag_of_words)
    print(bag_of_words_count)
    for word, count in wordDict.items():
        #print(word,count)
        tfDict[word] = count / float(bag_of_words_count)
    return tfDict        

In [9]:
tf_A = computeTF(numOfWordsA,bowA)
tf_B = computeTF(numOfWordsB,bowB)

print(tf_A)
print(tf_B)


{'a': 1, 'fire': 0, 'around': 0, 'man': 1, 'the': 1, 'sat': 0, 'walk': 1, 'went': 1, 'for': 1, 'out': 1, 'children': 0} ['the', 'man', 'went', 'out', 'for', 'a', 'walk']
7
{'a': 0, 'fire': 1, 'around': 1, 'man': 0, 'the': 2, 'sat': 1, 'walk': 0, 'went': 0, 'for': 0, 'out': 0, 'children': 1} ['the', 'children', 'sat', 'around', 'the', 'fire']
6
{'a': 0.14285714285714285, 'fire': 0.0, 'around': 0.0, 'man': 0.14285714285714285, 'the': 0.14285714285714285, 'sat': 0.0, 'walk': 0.14285714285714285, 'went': 0.14285714285714285, 'for': 0.14285714285714285, 'out': 0.14285714285714285, 'children': 0.0}
{'a': 0.0, 'fire': 0.16666666666666666, 'around': 0.16666666666666666, 'man': 0.0, 'the': 0.3333333333333333, 'sat': 0.16666666666666666, 'walk': 0.0, 'went': 0.0, 'for': 0.0, 'out': 0.0, 'children': 0.16666666666666666}


### Inverse Data Frequency (IDF)

In [10]:
def computeIDF(documents):
    import math
    N = len(documents)
    
    idfDict = dict.fromkeys(documents[0].keys(), 0)
    print(idfDict)
    for document in documents:
        for word, val in document.items():
            if val > 0:
                idfDict[word] += 1
    print(idfDict)           
    for word, val in idfDict.items():
        idfDict[word] = math.log(N / float(val))           
    
    return idfDict

In [11]:
idfs = computeIDF([numOfWordsA,numOfWordsB])
idfs

{'a': 0, 'fire': 0, 'around': 0, 'man': 0, 'the': 0, 'sat': 0, 'walk': 0, 'went': 0, 'for': 0, 'out': 0, 'children': 0}
{'a': 1, 'fire': 1, 'around': 1, 'man': 1, 'the': 2, 'sat': 1, 'walk': 1, 'went': 1, 'for': 1, 'out': 1, 'children': 1}


{'a': 0.6931471805599453,
 'fire': 0.6931471805599453,
 'around': 0.6931471805599453,
 'man': 0.6931471805599453,
 'the': 0.0,
 'sat': 0.6931471805599453,
 'walk': 0.6931471805599453,
 'went': 0.6931471805599453,
 'for': 0.6931471805599453,
 'out': 0.6931471805599453,
 'children': 0.6931471805599453}

### TF-IDF

In [12]:
def computeTFIDF(tf_bag_of_words, idfs):
    tfidf = {}
    for word, val in tf_bag_of_words.items():
        tfidf[word] = val * idfs[word]
    return tfidf    

In [13]:
# Compute the TF-IDF scores for all the words
tfidf_A = computeTFIDF(tf_A,idfs)
tfidf_B = computeTFIDF(tf_B,idfs)

df = pd.DataFrame([tfidf_A,tfidf_B])

df


Unnamed: 0,a,fire,around,man,the,sat,walk,went,for,out,children
0,0.099021,0.0,0.0,0.099021,0.0,0.0,0.099021,0.099021,0.099021,0.099021,0.0
1,0.0,0.115525,0.115525,0.0,0.0,0.115525,0.0,0.0,0.0,0.0,0.115525


#### Rather than manually implementing TF-IDF ourselves, we could use the class provided by sklearn. The values differ slightly because sklearn uses a smoothed version idf and various other little optimizations.

In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [15]:
vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform([documentA, documentB])
feature_names = vectorizer.get_feature_names()
print(feature_names)
dense = vectors.todense()
denselist = dense.tolist()
print(denselist)
df = pd.DataFrame(denselist, columns=feature_names)
df

['around', 'children', 'fire', 'for', 'man', 'out', 'sat', 'the', 'walk', 'went']
[[0.0, 0.0, 0.0, 0.42615959880289433, 0.42615959880289433, 0.42615959880289433, 0.0, 0.3032160644503863, 0.42615959880289433, 0.42615959880289433], [0.40740123733358447, 0.40740123733358447, 0.40740123733358447, 0.0, 0.0, 0.0, 0.40740123733358447, 0.5797386715376657, 0.0, 0.0]]


Unnamed: 0,around,children,fire,for,man,out,sat,the,walk,went
0,0.0,0.0,0.0,0.42616,0.42616,0.42616,0.0,0.303216,0.42616,0.42616
1,0.407401,0.407401,0.407401,0.0,0.0,0.0,0.407401,0.579739,0.0,0.0


In [16]:
# after applying some parameter value in TfidfVectorizer
vectorizer = TfidfVectorizer(analyzer='word',ngram_range=(1,2),min_df=0,stop_words='english')
vectors = vectorizer.fit_transform([documentA, documentB])
feature_names = vectorizer.get_feature_names()
print(feature_names)
dense = vectors.todense()
denselist = dense.tolist()
print(denselist)
df = pd.DataFrame(denselist, columns=feature_names)
df

['children', 'children sat', 'man', 'man went', 'sat', 'walk', 'went', 'went walk']
[[0.0, 0.0, 0.447213595499958, 0.447213595499958, 0.0, 0.447213595499958, 0.447213595499958, 0.447213595499958], [0.5773502691896257, 0.5773502691896257, 0.0, 0.0, 0.5773502691896257, 0.0, 0.0, 0.0]]


Unnamed: 0,children,children sat,man,man went,sat,walk,went,went walk
0,0.0,0.0,0.447214,0.447214,0.0,0.447214,0.447214,0.447214
1,0.57735,0.57735,0.0,0.0,0.57735,0.0,0.0,0.0


# Content based recommender 
A content based recommender works with data that the user provides, either explicitly (rating) or implicitly (clicking on a link). Based on that data, a user profile is generated, which is then used to make suggestions to the user. As the user provides more inputs or takes actions on the recommendations, the engine becomes more and more accurate.

In [17]:
#reading movie file
movie_df = pd.read_csv("movies.csv")

In [18]:
movie_df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji,Adventure|Children|Fantasy
2,3,Grumpier Old Men,Comedy|Romance
3,4,Waiting to Exhale,Comedy|Drama|Romance
4,5,Father of the Bride Part II,Comedy


In [19]:
# Break up the big genre string into a string array
movie_df['genres'] = movie_df['genres'].str.split('|')

# convert genre to string value
movie_df['genres'] = movie_df['genres'].fillna("").astype('str')

In [20]:
movie_df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story,"['Adventure', 'Animation', 'Children', 'Comedy..."
1,2,Jumanji,"['Adventure', 'Children', 'Fantasy']"
2,3,Grumpier Old Men,"['Comedy', 'Romance']"
3,4,Waiting to Exhale,"['Comedy', 'Drama', 'Romance']"
4,5,Father of the Bride Part II,['Comedy']


## Recommandation based on genre

In [21]:
from sklearn.feature_extraction.text import TfidfVectorizer
tf = TfidfVectorizer(analyzer='word',ngram_range=(1,2),min_df=0,stop_words='english')
tfidf_matrix = tf.fit_transform(movie_df['genres'])
feature_names = tf.get_feature_names()
dense = tfidf_matrix.todense()
denselist = dense.tolist()
df = pd.DataFrame(denselist, columns=feature_names)
#tfidf_matrix.shape
df


Unnamed: 0,action,action adventure,action animation,action children,action comedy,action crime,action documentary,action drama,action fantasy,action horror,...,sci fi,thriller,thriller imax,thriller war,thriller western,war,war imax,war western,western,western imax
0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9737,0.238499,0.0,0.533576,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9738,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9739,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9740,0.353726,0.0,0.791366,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
print(tfidf_matrix)

  (0, 63)	0.4051430286389587
  (0, 47)	0.3681884973089335
  (0, 34)	0.38369482677526473
  (0, 18)	0.4008862821540716
  (0, 108)	0.30254034715329503
  (0, 59)	0.16761357728391116
  (0, 46)	0.3162303113127544
  (0, 33)	0.32335863498874723
  (0, 17)	0.26110809240797916
  (1, 51)	0.5795995638728872
  (1, 19)	0.5337814180965866
  (1, 108)	0.36554429536140276
  (1, 46)	0.382085190978399
  (1, 17)	0.31548378439611124
  (2, 68)	0.7695974416123483
  (2, 160)	0.5242383036039113
  (2, 59)	0.36454626441402677
  (3, 103)	0.5645649298589199
  (3, 62)	0.5417511322516687
  (3, 96)	0.2904365851652309
  (3, 160)	0.4522400920963429
  (3, 59)	0.31447995130958456
  (4, 59)	1.0
  (5, 84)	0.604518892749723
  (5, 5)	0.5454388121871825
  :	:
  (9733, 38)	0.835677806885533
  (9733, 96)	0.23714974930952545
  (9733, 33)	0.495381266784903
  (9734, 62)	0.7846149876753742
  (9734, 96)	0.42063760299449465
  (9734, 59)	0.4554594691761476
  (9735, 33)	1.0
  (9736, 86)	1.0
  (9737, 2)	0.5335755137706529
  (9737, 35)	0.4

In [23]:
from sklearn.metrics.pairwise import cosine_similarity
cosine_sim = cosine_similarity(tfidf_matrix,tfidf_matrix)
cosine_sim[:4,:4]

array([[1.        , 0.31379419, 0.0611029 , 0.05271111],
       [0.31379419, 1.        , 0.        , 0.        ],
       [0.0611029 , 0.        , 1.        , 0.35172407],
       [0.05271111, 0.        , 0.35172407, 1.        ]])

In [24]:
print(cosine_sim)

[[1.         0.31379419 0.0611029  ... 0.         0.16123168 0.16761358]
 [0.31379419 1.         0.         ... 0.         0.         0.        ]
 [0.0611029  0.         1.         ... 0.         0.         0.36454626]
 ...
 [0.         0.         0.         ... 1.         0.         0.        ]
 [0.16123168 0.         0.         ... 0.         1.         0.        ]
 [0.16761358 0.         0.36454626 ... 0.         0.         1.        ]]


In [25]:
# build a 1-dimesional array with movie title
titles= movie_df['title']
indices = pd.Series(movie_df.index, index=movie_df['title'])
indices

title
Toy Story                                 0
Jumanji                                   1
Grumpier Old Men                          2
Waiting to Exhale                         3
Father of the Bride Part II               4
                                       ... 
Black Butler: Book of the Atlantic     9737
No Game No Life: Zero                  9738
Flint                                  9739
Bungo Stray Dogs: Dead Apple           9740
Andrew Dice Clay: Dice Rules           9741
Length: 9742, dtype: int64

In [47]:
# Funtion that get movie recommendation based on the consine simillarity score of movie ganre

def genre_recommendation(title):
    idx = indices[title]
    print(idx);
    sim_scores = list(enumerate(cosine_sim[idx]))
    #print(sim_scores)
    sim_scores = sorted(sim_scores,key=lambda x:x[1], reverse=True)
    #print(sim_scores[10])
    sim_scores = sim_scores[1:21]
    movie_indices = [i[0] for i in sim_scores]
    print(movie_indices)
    return titles.iloc[movie_indices]

In [48]:
genre_recommendation('Toy Story ').head(10)

0
[1706, 2355, 2809, 3000, 3568, 6194, 6486, 6948, 7760, 8219, 8927, 9430, 5490, 6448, 8357, 3194, 7530, 7805, 7184, 7917]


1706                                                Antz 
2355                                         Toy Story 2 
2809             Adventures of Rocky and Bullwinkle, The 
3000                           Emperor's New Groove, The 
3568                                      Monsters, Inc. 
6194                                           Wild, The 
6486                                     Shrek the Third 
6948                             Tale of Despereaux, The 
7760    Asterix and the Vikings (Astérix et les Vikings) 
8219                                               Turbo 
Name: title, dtype: object

## Recommandation based on title

In [50]:
from sklearn.feature_extraction.text import TfidfVectorizer
tf = TfidfVectorizer(analyzer='word',ngram_range=(1,2),min_df=0,stop_words='english')
tfidf_matrix_title = tf.fit_transform(movie_df['title'])
tfidf_matrix_title.shape


(9742, 20413)

In [52]:
from sklearn.metrics.pairwise import cosine_similarity
cosine_sim_title = cosine_similarity(tfidf_matrix_title,tfidf_matrix_title)
cosine_sim_title[:4,:4]

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [53]:
# build a 1-dimesional array with movie title
titles= movie_df['title']
indices = pd.Series(movie_df.index, index=movie_df['title'])
indices

title
Toy Story                                 0
Jumanji                                   1
Grumpier Old Men                          2
Waiting to Exhale                         3
Father of the Bride Part II               4
                                       ... 
Black Butler: Book of the Atlantic     9737
No Game No Life: Zero                  9738
Flint                                  9739
Bungo Stray Dogs: Dead Apple           9740
Andrew Dice Clay: Dice Rules           9741
Length: 9742, dtype: int64

In [56]:
# Funtion that get movie recommendation based on the consine simillarity score of movie titles

def title_recommendation(title):
    idx = indices[title]
    sim_scores_title = list(enumerate(cosine_sim_title[idx]))
    sim_scores_title = sorted(sim_scores_title,key=lambda x:x[1], reverse=True)
    sim_scores_title = sim_scores_title[1:21]
    movie_indices_title = [i[0] for i in sim_scores_title]
    return titles.iloc[movie_indices_title]

In [61]:
title_recommendation('Toy Story ').head(10)

2355           Toy Story 2 
7355           Toy Story 3 
3595              Toy, The 
1570            L.A. Story 
2227      Story of Us, The 
4089          Toy Soldiers 
3187            Love Story 
2110    Christmas Story, A 
4047           Ghost Story 
8736            True Story 
Name: title, dtype: object

# Collaborative Filtering

In [62]:
# library
import math
import operator

#Building Custom Data for Movie Rating
review = {
'Marlon Brando': {
'The Godfather': 5.00, 
'The Godfather Part II': 4.29,
'Apocalypse Now': 5.00, 
'Jaws': 1.
},
'Stephen King': {
'The Shawshank Redemption': 4.89, 
'The Shining': 4.93 , 
'The Green Mile': 4.87,
'The Godfather': 1.33,
},
'Steven Spielberg': {
'Raiders of the Lost Ark': 5.0, 
'Jaws': 4.89,
'Saving Private Ryan': 4.78, 
'Star Wars Episode IV - A New Hope': 4.33,
'Close Encounters of the Third Kind': 4.77,
'The Godfather':  1.25,
'The Godfather Part II': 1.72
},
'George Lucas':{
'Star Wars Episode IV - A New Hope': 5.00	
},
'Al Pacino': {
'The Godfather': 4.02, 
'The Godfather Part II': 5.00,
},
'Robert DeNiro': {
'The Godfather': 3.07, 
'The Godfather Part II': 4.29, 
'Raging Bull': 5.00, 
'Goodfellas':  4.89
},
'Robert Duvall': {
'The Godfather': 3.80, 
'The Godfather Part II': 3.61,
'Apocalypse Now': 4.26 
},
'Jack Nicholson': {
'The Shining': 5.0,
'One Flew Over The Cuckoos Nest': 5.0,
'The Godfather': 2.22,
'The Godfather Part II': 3.34
},
'Morgan Freeman': {
'The Shawshank Redemption': 4.98,
'The Shining': 4.42,
'Apocalypse Now': 1.63,
'The Godfather': 1.12,
'The Godfather Part II': 2.16
},
'Harrison Ford': {
'Raiders of the Lost Ark': 5.0, 
'Star Wars Episode IV - A New Hope': 4.84,
},
'Tom Hanks': {
'Saving Private Ryan': 3.78, 
'The Green Mile': 4.96,
'The Godfather': 1.04,
'The Godfather Part II': 1.03
},
'Francis Ford Coppola': {
'The Godfather': 5.00, 
'The Godfather Part II': 5.0, 
'Jaws': 1.24,
'One Flew Over The Cuckoos Nest': 2.02
},
'Martin Scorsese': {
'Raging Bull': 5.0, 
'Goodfellas': 4.87,
'Close Encounters of the Third Kind': 1.14,
'The Godfather': 4.00
},
'Diane Keaton': {
'The Godfather': 2.98,
'The Godfather Part II': 3.93,
'Close Encounters of the Third Kind': 1.37
},
'Richard Dreyfuss': {
'Jaws': 5.0, 
'Close Encounters of the Third Kind': 5.0,
'The Godfather': 1.07,
'The Godfather Part II': 0.63
},
'Joe Pesci': {
'Raging Bull': 4.89, 
'Goodfellas': 5.0,
'The Godfather': 4.87,
'Star Wars Episode IV - A New Hope': 1.32
}
}

In [66]:
# function to get common movie between users
def get_common_movies(critic_A, critic_B):
    return [movie for movie in review[critic_A] if movie in review[critic_B]]


In [67]:
get_common_movies('Marlon Brando','Robert DeNiro')

['The Godfather', 'The Godfather Part II']

In [68]:
get_common_movies('Steven Spielberg','Tom Hanks')

['Saving Private Ryan', 'The Godfather', 'The Godfather Part II']

In [69]:
# Function to get reviews from the common movies
def get_reviews(critic_A,critic_B):
    common_movies = get_common_movies(critic_A,critic_B)
    return [(review[critic_A][movie], review[critic_B][movie]) for movie in common_movies]

In [70]:
get_reviews('Marlon Brando','Robert DeNiro')

[(5.0, 3.07), (4.29, 4.29)]

In [71]:
get_reviews('Steven Spielberg','Tom Hanks')

[(4.78, 3.78), (1.25, 1.04), (1.72, 1.03)]

### Euclidean Distance Formula for Calculating similarity

In [72]:
# Function to get Euclidean Distance b/w 2 points 
def euclidean_distance(points):
    squared_diffs = [(point[0] - point[1]) ** 2 for point in points]
    summed_squared_diffs = sum(squared_diffs)
    distance = math.sqrt(summed_squared_diffs)
    return distance

In [73]:
# Function to  calculate similarity more similar less the distance and vice versa
# Added 1 for if highly similar can make the distance zero and give NotDefined Error
def similarity(reviews):
    return 1/ (1 + euclidean_distance(reviews))

In [74]:
# Function to get similarity b/w 2 users
def get_critic_similarity(critic_A, critic_B):
    reviews = get_reviews(critic_A,critic_B)
    return similarity(reviews)

In [75]:
get_critic_similarity('Marlon Brando','Robert DeNiro')

0.341296928327645

In [76]:
get_critic_similarity('Steven Spielberg','Tom Hanks')

0.4478352722730117

In [77]:
get_critic_similarity('Martin Scorsese','Joe Pesci')

0.5300793497254199

In [78]:
# Function to give recommendation to users based on their reviews.
def recommend_movies(critic, num_suggestions):
    similarity_scores = [(get_critic_similarity(critic, other), other) for other in review if other != critic]
    # Get similarity Scores for all the critics
    similarity_scores.sort() 
    similarity_scores.reverse()
    similarity_scores = similarity_scores[0:num_suggestions]

    recommendations = {}
    # Dictionary to store recommendations
    for similarity, other in similarity_scores:
        reviewed = review[other]
        # Storing the review
        for movie in reviewed:
            if movie not in review[critic]:
                weight = similarity * reviewed[movie]
                # Weighing similarity with review
                if movie in recommendations:
                    sim, weights = recommendations[movie]
                    recommendations[movie] = (sim + similarity, weights + [weight])
                    # Similarity of movie along with weight
                else:
                    recommendations[movie] = (similarity, [weight])
                    

    for recommendation in recommendations:
        similarity, movie = recommendations[recommendation]
        recommendations[recommendation] = sum(movie) / similarity
        # Normalizing weights with similarity

    sorted_recommendations = sorted(recommendations.items(), key=operator.itemgetter(1), reverse=True)
    #Sorting recommendations with weight
    return sorted_recommendations

In [81]:
recommend_movies('Marlon Brando',5)

[('Raiders of the Lost Ark', 5.0),
 ('Goodfellas', 4.953067092651758),
 ('Raging Bull', 4.9297124600638975),
 ('Star Wars Episode IV - A New Hope', 3.8157055214723923),
 ('One Flew Over The Cuckoos Nest', 2.02),
 ('Close Encounters of the Third Kind', 1.14)]

In [80]:
recommend_movies('Robert DeNiro',3)

[('Raiders of the Lost Ark', 5.0),
 ('Star Wars Episode IV - A New Hope', 4.92),
 ('Close Encounters of the Third Kind', 1.37)]

In [82]:
recommend_movies('Steven Spielberg',4)

[('The Shawshank Redemption', 4.928285762244913),
 ('The Green Mile', 4.87),
 ('The Shining', 4.71304734727882),
 ('Apocalypse Now', 1.63)]

In [84]:
recommend_movies('Tom Hanks',2)

[('Raiders of the Lost Ark', 5.0), ('Star Wars Episode IV - A New Hope', 4.92)]

In [85]:
recommend_movies('Joe Pesci',4)

[('Apocalypse Now', 5.000000000000001),
 ('The Godfather Part II', 4.7280538302277435),
 ('One Flew Over The Cuckoos Nest', 2.02),
 ('Close Encounters of the Third Kind', 1.14),
 ('Jaws', 1.12)]