In [1]:
import pandas as pd
# Import TfIdfVectorizer from scikit-learn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [19]:
# Load Movies Metadata
data = pd.read_csv('../data/leetcode_data_processed_synset2.csv', low_memory=False)

In [20]:
data['content'] = data['content'].astype(str)
data['content'] = data['content'].apply(lambda x: x.replace('[','').replace(']',''))

In [21]:
def synset_tokenizer(doc):
    return doc.split(',')

In [22]:
sfidf = TfidfVectorizer(tokenizer = synset_tokenizer)
sfidf_matrix = sfidf.fit_transform(data['content'])
sfidf_matrix.shape

(874, 11327)

In [23]:
# Compute the cosine similarity matrix
cosine_sim = linear_kernel(sfidf_matrix, sfidf_matrix)

#Construct a reverse map of indices and movie titles
indices = pd.Series(data.index, index=data['name']).drop_duplicates()
idx = pd.Series(data.index, index=data['name']).drop_duplicates()

In [10]:
idx[:3]

name
Shortest Path in Binary Matrix    0
Shortest Common Supersequence     1
Largest Values From Labels        2
dtype: int64

In [11]:
def get_recommendations(title, cosine_sim=cosine_sim):
    # get the idx of the movie that matches the title
    movie_idx = idx[title]
    # get the pairwise similarity scores with that movie
    sim_scores = list(enumerate(cosine_sim[movie_idx]))
    # sort
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    res_idx = [i[0] for i in sim_scores]
    
    return data['name'].iloc[res_idx]

In [12]:
get_recommendations('LRU Cache')

514                       LFU Cache
540        All O`one Data Structure
380                   Map Sum Pairs
274                  Design HashMap
459     Convert BST to Greater Tree
524            Delete Node in a BST
83       Time Based Key-Value Store
230                  Keys and Rooms
130                   Knight Dialer
581    Insert Delete GetRandom O(1)
Name: name, dtype: object

In [13]:
get_recommendations('LFU Cache')

728                          LRU Cache
540           All O`one Data Structure
380                      Map Sum Pairs
274                     Design HashMap
459        Convert BST to Greater Tree
524               Delete Node in a BST
230                     Keys and Rooms
130                      Knight Dialer
83          Time Based Key-Value Store
484    Find Mode in Binary Search Tree
Name: name, dtype: object

In [14]:
get_recommendations('Merge k Sorted Lists')

853                       Merge Two Sorted Lists
855             Remove Nth Node From End of List
254                           Design Linked List
727                          Insertion Sort List
12                              Distant Barcodes
32                          Stream of Characters
87                     Squares of a Sorted Array
301              Kth Largest Element in a Stream
765    Convert Sorted List to Binary Search Tree
257                       Linked List Components
Name: name, dtype: object

In [15]:
get_recommendations('Spiral Matrix')

486                          Diagonal Traverse
815                           Spiral Matrix II
310                            Toeplitz Matrix
582    Kth Smallest Element in a Sorted Matrix
826                               Rotate Image
593      Max Sum of Rectangle No Larger Than K
301            Kth Largest Element in a Stream
160                       Sort Array By Parity
440                         Reshape the Matrix
455                                  01 Matrix
Name: name, dtype: object

In [16]:
get_recommendations('Decode Ways')

410                            Decode Ways II
185                   Decoded String at Index
656                             Valid Anagram
498                            Magical String
148                      Reverse Only Letters
122             Find the Shortest Superstring
322                     Special Binary String
508    Unique Substrings in Wraparound String
759                     Distinct Subsequences
657         Different Ways to Add Parentheses
Name: name, dtype: object

In [17]:
get_recommendations('Two Sum')

713    Two Sum II - Input array is sorted
839                Search Insert Position
299                         Binary Search
690          Implement Trie (Prefix Tree)
708           Binary Search Tree Iterator
301       Kth Largest Element in a Stream
32                   Stream of Characters
574    First Unique Character in a String
563                     Random Pick Index
160                  Sort Array By Parity
Name: name, dtype: object

In [18]:
get_recommendations('01 Matrix')

35               Matrix Cells in Distance Order
354            Find K-th Smallest Pair Distance
486                           Diagonal Traverse
593       Max Sum of Rectangle No Larger Than K
820                               Spiral Matrix
11     Number of Submatrices That Sum to Target
310                             Toeplitz Matrix
501                      Total Hamming Distance
557                          Sum of Left Leaves
800                          Search a 2D Matrix
Name: name, dtype: object