In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity
import text2emotion as te
import tkinter as tk

[nltk_data] Downloading package stopwords to /Users/Kenny/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /Users/Kenny/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/Kenny/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [2]:
movie_adj = pd.read_csv('processed_data_adjs.csv')

In [3]:
# perform cosine_sim without drop duplicate
drop_list = ['much', 'first', 'many', 'non', 'human', 'last', 'main', 'sure', 'full', \
                 'anti', 'top', 'whole', 'little', 'able', 'true', 'hard', 'final', 'second', \
                 'entire','long', 'year', 'big', 'small', 'several', 'short','overall', 'next', \
                 'obvious', 'self', 'right', 'actual']
stop = list(stopwords.words('english')) + drop_list

count = CountVectorizer(stop_words=stop,\
                             min_df=3)
count_matrix = count.fit_transform(movie_adj['adjectives_merge'])
cosine_sim = cosine_similarity(count_matrix, count_matrix)
cosine_sim

array([[1.        , 0.78976658, 0.81113612, ..., 0.60861495, 0.58581199,
        0.50476911],
       [0.78976658, 1.        , 0.80321151, ..., 0.53791928, 0.50209257,
        0.44469431],
       [0.81113612, 0.80321151, 1.        , ..., 0.43896995, 0.41631989,
        0.39158327],
       ...,
       [0.60861495, 0.53791928, 0.43896995, ..., 1.        , 0.60091842,
        0.51954654],
       [0.58581199, 0.50209257, 0.41631989, ..., 0.60091842, 1.        ,
        0.46830396],
       [0.50476911, 0.44469431, 0.39158327, ..., 0.51954654, 0.46830396,
        1.        ]])

In [4]:
# perform cosine_sim with drop duplicate
drop_list = ['much', 'first', 'many', 'non', 'human', 'last', 'main', 'sure', 'full', \
                 'anti', 'top', 'whole', 'little', 'able', 'true', 'hard', 'final', 'second', \
                 'entire','long', 'year', 'big', 'small', 'several', 'short','overall', 'next', \
                 'obvious', 'self', 'right', 'actual']
stop = list(stopwords.words('english')) + drop_list

count = CountVectorizer(stop_words=stop,\
                             min_df=3)
count_matrix = count.fit_transform(movie_adj['adjectives_with_drop_merge'])
cosine_sim_drop = cosine_similarity(count_matrix, count_matrix)
cosine_sim_drop

array([[1.        , 0.43388979, 0.44694812, ..., 0.34728885, 0.37239952,
        0.34753817],
       [0.43388979, 1.        , 0.40859363, ..., 0.38503974, 0.33563847,
        0.35124315],
       [0.44694812, 0.40859363, 1.        , ..., 0.35625946, 0.35566367,
        0.33396451],
       ...,
       [0.34728885, 0.38503974, 0.35625946, ..., 1.        , 0.37300782,
        0.35075511],
       [0.37239952, 0.33563847, 0.35566367, ..., 0.37300782, 1.        ,
        0.35200375],
       [0.34753817, 0.35124315, 0.33396451, ..., 0.35075511, 0.35200375,
        1.        ]])

In [5]:
# Define the function of recommendation with 3 inputs.
def recommend(title, recommend_number = 5, cosine_sim = cosine_sim):
    indices = pd.Series(movie_adj['movieName'])
    recommended_movies = []
    idx = indices[indices == title].index[0]
    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = False)
    top_indices = list(score_series.iloc[1:1+recommend_number].index)
    
    for i in top_indices:
        recommended_movies.append(list(movie_adj['movieName'])[i])
        
    return recommended_movies

In [6]:
recommend('Vertigo', 6, cosine_sim_drop)

['Rebecca',
 'The Secret in Their Eyes',
 'Rear Window',
 'No Country for Old Men',
 'The Prestige',
 'Psycho']

In [11]:
# Using tkinter pacakge to design a UI.
window = tk.Tk()
window.title('Movie Recommend System')
window.geometry('500x400')

enterboxlabel1 = tk.Label(window, text='Please enter the movie', font=('Arial', 14)).place(x=10, y=20)
enterbox1 = tk.Entry(window, font=('Arial', 14), textvariable= tk.StringVar())
enterbox1.place(x=260, y=20)

enterboxlabel2 = tk.Label(window, text='Please enter the number (max=6)', font=('Arial', 14)).place(x=10, y=50)
enterbox2 = tk.Entry(window, font=('Arial', 14), textvariable= tk.IntVar())
enterbox2.place(x=260, y=50)

label0 = tk.StringVar()
label1 = tk.StringVar()
label2 = tk.StringVar()
label3 = tk.StringVar()
label4 = tk.StringVar()
label5 = tk.StringVar()
l0 = tk.Label(window, textvariable=label0, bg='blue', fg='white', font=('Arial', 15), width=40, height=2)
l0.place(x=10,y=120)
l1 = tk.Label(window, textvariable=label1, bg='blue', fg='white', font=('Arial', 15), width=40, height=2)
l1.place(x=10,y=160)
l2 = tk.Label(window, textvariable=label2, bg='blue', fg='white', font=('Arial', 15), width=40, height=2)
l2.place(x=10,y=200)
l3 = tk.Label(window, textvariable=label3, bg='blue', fg='white', font=('Arial', 15), width=40, height=2)
l3.place(x=10,y=240)
l4 = tk.Label(window, textvariable=label4, bg='blue', fg='white', font=('Arial', 15), width=40, height=2)
l4.place(x=10,y=280)
l5 = tk.Label(window, textvariable=label5, bg='blue', fg='white', font=('Arial', 15), width=40, height=2)
l5.place(x=10,y=320)

label_t = tk.Label(window, text='The predicted answer is: ', font=('Arial', 14)).place(x=10, y=80)

def hit():
        
    name = str(enterbox1.get())
    number = int(enterbox2.get())
    pred = recommend(name,number)
    if len(pred)>=1:
        label0.set('1. <'+pred[0]+'>')
    if len(pred)>=2:
        label1.set('2. <'+pred[1]+'>')
    if len(pred)>=3:
        label2.set('3. <'+pred[2]+'>')
    if len(pred)>=4:
        label3.set('4. <'+pred[3]+'>')
    if len(pred)>=5:
        label4.set('5. <'+pred[4]+'>')
    if len(pred)>=6:
        label5.set('6. <'+pred[5]+'>')
        
button = tk.Button(window, text='Recommend', font=('Arial', 14), width=10, height=1, command=hit)
button.place(x=210,y=80)

buttonexit = tk.Button(window, text = "Exit", font=('Arial', 14), command = window.quit)
buttonexit.place(x=400,y=300)

In [None]:
window.mainloop()