In [19]:
import pandas as pd
# Import TfIdfVectorizer from scikit-learn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [20]:
# Load Movies Metadata
data = pd.read_csv('../data/leetcode_data_processed_topics3.csv', low_memory=False)

In [21]:
tfidf = TfidfVectorizer(stop_words='english')
data['content'] = data['content'].fillna('')
tfidf_matrix = tfidf.fit_transform(data['content'])
tfidf_matrix.shape

(874, 2871)

In [24]:
tfidf.vocabulary_ 

{'square': 2397,
 'cell': 343,
 'blocked': 227,
 'length': 1371,
 'composed': 467,
 'cells': 344,
 'adjacent': 39,
 'connected': 485,
 'different': 690,
 'edge': 779,
 'location': 1428,
 'value': 2745,
 'located': 1427,
 'return': 2137,
 'shortest': 2291,
 'clear': 399,
 'path': 1770,
 'breadth': 257,
 'search': 2229,
 'strings': 2457,
 'string': 2456,
 'multiple': 1583,
 'answers': 91,
 'subsequence': 2482,
 'deleting': 636,
 'number': 1640,
 'characters': 361,
 'chosen': 380,
 'results': 2133,
 'substring': 2487,
 'delete': 633,
 'answer': 89,
 'provided': 1945,
 'satisfies': 2213,
 'consist': 497,
 'lowercase': 1457,
 'english': 821,
 'dynamic': 771,
 'programming': 1933,
 'set': 2273,
 'item': 1288,
 'label': 1329,
 'subset': 2485,
 'items': 1289,
 'largest': 1347,
 'possible': 1869,
 'sum': 2498,
 'values': 2747,
 'labels': 1333,
 'fifth': 943,
 'fourth': 1011,
 'hash': 1114,
 'table': 2533,
 'greedy': 1081,
 'fixed': 964,
 'arr': 118,
 'duplicate': 767,
 'occurrence': 1669,
 'shi

In [23]:
data['content'][1]

'two strings shortest string multiple answers may return string subsequence string deleting number characters characters chosen anywhere results string substring delete first substring delete last answer provided shortest string satisfies consist lowercase english dynamic programming'

In [5]:
# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

#Construct a reverse map of indices and movie titles
indices = pd.Series(data.index, index=data['name']).drop_duplicates()
idx = pd.Series(data.index, index=data['name']).drop_duplicates()

In [6]:
idx[:3]

name
Shortest Path in Binary Matrix    0
Shortest Common Supersequence     1
Largest Values From Labels        2
dtype: int64

In [7]:
def get_recommendations(title, cosine_sim=cosine_sim):
    # get the idx of the movie that matches the title
    movie_idx = idx[title]
    # get the pairwise similarity scores with that movie
    sim_scores = list(enumerate(cosine_sim[movie_idx]))
    # sort
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    res_idx = [i[0] for i in sim_scores]
    
    return data['name'].iloc[res_idx]

In [8]:
get_recommendations('LRU Cache')

514                       LFU Cache
540        All O`one Data Structure
274                  Design HashMap
380                   Map Sum Pairs
275                  Design HashSet
581    Insert Delete GetRandom O(1)
690    Implement Trie (Prefix Tree)
642                Peeking Iterator
459     Convert BST to Greater Tree
719                       Min Stack
Name: name, dtype: object

In [9]:
get_recommendations('LFU Cache')

728                       LRU Cache
540        All O`one Data Structure
274                  Design HashMap
380                   Map Sum Pairs
275                  Design HashSet
581    Insert Delete GetRandom O(1)
690    Implement Trie (Prefix Tree)
459     Convert BST to Greater Tree
719                       Min Stack
83       Time Based Key-Value Store
Name: name, dtype: object

In [10]:
get_recommendations('Merge k Sorted Lists')

853                   Merge Two Sorted Lists
791       Remove Duplicates from Sorted List
855         Remove Nth Node From End of List
254                       Design Linked List
782                   Reverse Linked List II
792    Remove Duplicates from Sorted List II
257                   Linked List Components
529                       Add Two Numbers II
872                          Add Two Numbers
661             Delete Node in a Linked List
Name: name, dtype: object

In [11]:
get_recommendations('Spiral Matrix')

486                        Diagonal Traverse
815                         Spiral Matrix II
310                          Toeplitz Matrix
826                             Rotate Image
440                       Reshape the Matrix
593    Max Sum of Rectangle No Larger Than K
800                       Search a 2D Matrix
631           Range Sum Query 2D - Immutable
455                                01 Matrix
160                     Sort Array By Parity
Name: name, dtype: object

In [12]:
get_recommendations('Decode Ways')

410                            Decode Ways II
185                   Decoded String at Index
576                               Mini Parser
677                            Maximal Square
122             Find the Shortest Superstring
148                      Reverse Only Letters
508    Unique Substrings in Wraparound String
854                         Valid Parentheses
789                         Maximal Rectangle
567                             Decode String
Name: name, dtype: object

In [13]:
get_recommendations('Two Sum')

713                   Two Sum II - Input array is sorted
679                                Contains Duplicate II
299                                        Binary Search
858                                         3Sum Closest
840    Find First and Last Position of Element in Sor...
841                       Search in Rotated Sorted Array
856                                                 4Sum
632                          Range Sum Query - Immutable
793                    Search in Rotated Sorted Array II
600                              Top K Frequent Elements
Name: name, dtype: object

In [15]:
get_recommendations('01 Matrix')

35            Matrix Cells in Distance Order
486                        Diagonal Traverse
354         Find K-th Smallest Pair Distance
820                            Spiral Matrix
310                          Toeplitz Matrix
826                             Rotate Image
0             Shortest Path in Binary Matrix
593    Max Sum of Rectangle No Larger Than K
800                       Search a 2D Matrix
763             Minimum Depth of Binary Tree
Name: name, dtype: object