In [1]:
import pandas as pd
# Import TfIdfVectorizer from scikit-learn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [2]:
# Load Movies Metadata
data = pd.read_csv('../leetcode_data_processed.csv', low_memory=False)

In [3]:
tfidf = TfidfVectorizer(stop_words='english')
data['content'] = data['content'].fillna('')
tfidf_matrix = tfidf.fit_transform(data['content'])
tfidf_matrix.shape

(874, 2858)

In [15]:
data['content'][1]

"['Given', 'two', 'strings', 'shortest', 'string', 'multiple', 'answers', 'may', 'return', 'string', 'S', 'subsequence', 'string', 'T', 'deleting', 'number', 'characters', 'T', 'characters', 'chosen', 'anywhere', 'results', 'string', 'Example', 'substring', 'delete', 'first', 'substring', 'delete', 'last', 'The', 'answer', 'provided', 'shortest', 'string', 'satisfies', 'consist', 'lowercase', 'English']"

In [6]:
# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

#Construct a reverse map of indices and movie titles
indices = pd.Series(data.index, index=data['name']).drop_duplicates()
idx = pd.Series(data.index, index=data['name']).drop_duplicates()

In [7]:
idx[:3]

name
Shortest Path in Binary Matrix    0
Shortest Common Supersequence     1
Largest Values From Labels        2
dtype: int64

In [8]:
def get_recommendations(title, cosine_sim=cosine_sim):
    # get the idx of the movie that matches the title
    movie_idx = idx[title]
    # get the pairwise similarity scores with that movie
    sim_scores = list(enumerate(cosine_sim[movie_idx]))
    # sort
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    res_idx = [i[0] for i in sim_scores]
    
    return data['name'].iloc[res_idx]

In [9]:
get_recommendations('LRU Cache')

514                       LFU Cache
540        All O`one Data Structure
274                  Design HashMap
380                   Map Sum Pairs
581    Insert Delete GetRandom O(1)
275                  Design HashSet
690    Implement Trie (Prefix Tree)
459     Convert BST to Greater Tree
83       Time Based Key-Value Store
343                 My Calendar III
Name: name, dtype: object

In [10]:
get_recommendations('LFU Cache')

728                       LRU Cache
540        All O`one Data Structure
274                  Design HashMap
380                   Map Sum Pairs
581    Insert Delete GetRandom O(1)
275                  Design HashSet
459     Convert BST to Greater Tree
690    Implement Trie (Prefix Tree)
83       Time Based Key-Value Store
343                 My Calendar III
Name: name, dtype: object

In [13]:
get_recommendations('Merge k Sorted Lists')

853                       Merge Two Sorted Lists
87                     Squares of a Sorted Array
786                           Merge Sorted Array
791           Remove Duplicates from Sorted List
529                           Add Two Numbers II
765    Convert Sorted List to Binary Search Tree
792        Remove Duplicates from Sorted List II
872                              Add Two Numbers
254                           Design Linked List
855             Remove Nth Node From End of List
Name: name, dtype: object

In [17]:
get_recommendations('Spiral Matrix')

486                        Diagonal Traverse
815                         Spiral Matrix II
310                          Toeplitz Matrix
826                             Rotate Image
440                       Reshape the Matrix
455                                01 Matrix
593    Max Sum of Rectangle No Larger Than K
800                       Search a 2D Matrix
631           Range Sum Query 2D - Immutable
677                           Maximal Square
Name: name, dtype: object

In [19]:
get_recommendations('Decode Ways')

410                                 Decode Ways II
185                        Decoded String at Index
576                                    Mini Parser
567                                  Decode String
650                                 Missing Number
148                           Reverse Only Letters
854                              Valid Parentheses
677                                 Maximal Square
657              Different Ways to Add Parentheses
687    Add and Search Word - Data structure design
Name: name, dtype: object