In [112]:
import pandas as pd
import ast
# Import TfIdfVectorizer from scikit-learn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [113]:
# Load Movies Metadata
data = pd.read_csv('../data/leetcode_data_processed_topics2.csv', low_memory=False)
test = pd.read_csv('../data/leetcode_labels.csv', low_memory=False)

In [114]:
test[test['name'] == 'LRU Cache'].id.values.astype(int)[0]

146

In [115]:
data[data['id'] == 146]

Unnamed: 0,id,name,content,difficulty
728,146,LRU Cache,"['design', 'implement', 'data', 'structure', '...",2


In [116]:
test['similar_questions'] = test['similar_questions'].apply(ast.literal_eval)

In [118]:
def get_recommendations(title, cosine_sim=cosine_sim, threshold=0.3):
    # get the idx of the movie that matches the title
    movie_idx = idx[title]
    # get the pairwise similarity scores with that movie
    sim_scores = list(enumerate(cosine_sim[movie_idx]))
    # sort
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = [sim for sim in sim_scores[1:] if sim[1] > threshold]
    res_idx = [i[0] for i in sim_scores]
    print(sim_scores)
    return data['name'].iloc[res_idx].tolist()

In [119]:
set([1,2,3,4]).isdisjoint([5,2,7])

False

In [120]:
def is_correct(predictions, actual):
    # if there is an overlap, return True
    return not set(predictions).isdisjoint(actual)

In [129]:
len(test), len(data)

(485, 874)

In [121]:
tfidf = TfidfVectorizer(stop_words='english')
data['content'] = data['content'].fillna('')
tfidf_matrix = tfidf.fit_transform(data['content'])
tfidf_matrix.shape

(874, 2871)

In [123]:
# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

#Construct a reverse map of indices and movie titles
indices = pd.Series(data.index, index=data['name']).drop_duplicates()
idx = pd.Series(data.index, index=data['name']).drop_duplicates()

In [124]:
idx[:3]

name
Shortest Path in Binary Matrix    0
Shortest Common Supersequence     1
Largest Values From Labels        2
dtype: int64

In [140]:
# Compute recommender accuracy

correct = 0
total = len(test)

for row in test.iterrows():
    question = row[1]['name']
    predictions = get_recommendations(question)
    actual = test[test['name'] == question].similar_questions.values[0]
    if is_correct(actual, predictions):
        correct += 1
    
print

Uncrossed Lines
Coloring A Border
Max Consecutive Ones III
Check If Word Is Valid After Substitutions
Find Common Characters
Grid Illumination
Minimum Cost to Merge Stones
Maximum Binary Tree II
Find the Town Judge
Number of Squareful Arrays
Minimum Number of K Consecutive Bit Flips
Rotting Oranges
Cousins in Binary Tree
Subarrays with K Different Integers
Broken Calculator
Add to Array-Form of Integer
Smallest String Starting From Leaf
Interval List Intersections
Minimum Cost For Tickets
Unique Paths III
Distribute Coins in Binary Tree
Longest Turbulent Subarray
Squares of a Sorted Array
Largest Perimeter Triangle
Subarray Sums Divisible by K
K Closest Points to Origin
Fibonacci Number
Binary Tree Cameras
Generate Random Point in a Circle
Random Point in Non-overlapping Rectangles
Random Pick with Weight
Koko Eating Bananas
Length of Longest Fibonacci Subsequence
Random Pick with Blacklist
Exam Room
K-Similar Strings
Peak Index in a Mountain Array
Maximize Distance to Closest Person
S

In [127]:
lru = get_recommendations('LRU Cache')
real_rec = test[test['name'] == 'LRU Cache'].similar_questions.values[0]
is_correct(real_rec, lru)

[(514, 0.8824297696245826), (540, 0.5044195853652891), (274, 0.3624936760324875), (380, 0.3047336093508198)]


True

In [49]:
get_recommendations('Clone Graph')

[(304, 0.5213717918415742), (45, 0.5161533866980255), (524, 0.5078429714941819), (193, 0.5060583132978923), (205, 0.50590288771497), (36, 0.4960356737870932), (292, 0.4883379519294247), (77, 0.4882137945665164), (736, 0.4856426905190308), (168, 0.4577659503223319), (750, 0.43971029816976254), (260, 0.4244157021945956), (76, 0.42410136913108093), (661, 0.42243786425054497), (171, 0.39570101799859514), (290, 0.3612584630571734), (278, 0.3579812458577154), (137, 0.3419415716160851), (615, 0.337483906920178), (254, 0.3318936282012278), (655, 0.33108996447145117), (763, 0.32915280952351017), (208, 0.3240867894783395), (66, 0.31816205901664557), (770, 0.3115148626416547), (25, 0.3085540592180413), (272, 0.3082564908426478), (303, 0.30806601248342913), (152, 0.3056094344617413), (141, 0.30419009290592164), (71, 0.30398040334396637), (443, 0.30142598648340185)]


['Search in a Binary Search Tree',
 'Next Greater Node In Linked List',
 'Delete Node in a BST',
 'Middle of the Linked List',
 'Smallest Subtree with all the Deepest Nodes',
 'Recover a Tree From Preorder Traversal',
 'Minimum Distance Between BST Nodes',
 'Vertical Order Traversal of a Binary Tree',
 'Copy List with Random Pointer',
 'Increasing Order Search Tree',
 'Binary Tree Maximum Path Sum',
 'Binary Tree Pruning',
 'Smallest String Starting From Leaf',
 'Delete Node in a Linked List',
 'All Possible Full Binary Trees',
 'Is Graph Bipartite?',
 'All Paths From Source to Target',
 'Minimize Malware Spread II',
 'Odd Even Linked List',
 'Design Linked List',
 'Binary Tree Paths',
 'Minimum Depth of Binary Tree',
 'All Nodes Distance K in Binary Tree',
 'Maximum Binary Tree II',
 'Maximum Depth of Binary Tree',
 'Binary Search Tree to Greater Sum Tree',
 'Find Eventual Safe States',
 'Insert into a Binary Search Tree',
 'Cat and Mouse',
 'Minimize Malware Spread',
 'Cousins in Bin

In [50]:
get_recommendations('Word Ladder') # with topic-tag

[(748, 0.976150580330179)]


['Word Ladder II']

In [51]:
get_recommendations('Minimum Genetic Mutation') # with topic-tag

[]


[]

In [52]:
get_recommendations('Decode Ways')

[(410, 0.6584319806172336), (185, 0.5247086207255156)]


['Decode Ways II', 'Decoded String at Index']

In [53]:
get_recommendations('Two Sum')

[(713, 0.66256420987432), (679, 0.44462072400763003), (299, 0.41592689777618674), (858, 0.3872313463838287), (840, 0.36803766108798236), (841, 0.35016395569231873), (856, 0.334000169160927), (632, 0.32132597439105204), (793, 0.3118163756407788)]


['Two Sum II - Input array is sorted',
 'Contains Duplicate II',
 'Binary Search',
 '3Sum Closest',
 'Find First and Last Position of Element in Sorted Array',
 'Search in Rotated Sorted Array',
 '4Sum',
 'Range Sum Query - Immutable',
 'Search in Rotated Sorted Array II']

In [54]:
get_recommendations('01 Matrix')

[(35, 0.5081588887579545), (486, 0.3842566845728877), (354, 0.3562310725380605), (820, 0.34826750532698403), (310, 0.307367968363666)]


['Matrix Cells in Distance Order',
 'Diagonal Traverse',
 'Find K-th Smallest Pair Distance',
 'Spiral Matrix',
 'Toeplitz Matrix']