In [3]:
import pandas as pd
import numpy as np

url = 'https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_Electronics_v1_00.tsv.gz'
url_data = pd.read_table(url, sep='\t', nrows=809815, error_bad_lines=False)

b'Skipping line 9076: expected 15 fields, saw 22\nSkipping line 19256: expected 15 fields, saw 22\nSkipping line 24313: expected 15 fields, saw 22\nSkipping line 47211: expected 15 fields, saw 22\nSkipping line 54295: expected 15 fields, saw 22\nSkipping line 56641: expected 15 fields, saw 22\nSkipping line 63067: expected 15 fields, saw 22\n'
b'Skipping line 93796: expected 15 fields, saw 22\n'
b'Skipping line 132806: expected 15 fields, saw 22\nSkipping line 164631: expected 15 fields, saw 22\nSkipping line 167019: expected 15 fields, saw 22\nSkipping line 167212: expected 15 fields, saw 22\n'
b'Skipping line 198103: expected 15 fields, saw 22\nSkipping line 199191: expected 15 fields, saw 22\nSkipping line 202841: expected 15 fields, saw 22\nSkipping line 218228: expected 15 fields, saw 22\nSkipping line 235900: expected 15 fields, saw 22\n'
b'Skipping line 277761: expected 15 fields, saw 22\nSkipping line 304582: expected 15 fields, saw 22\nSkipping line 312029: expected 15 fields,

In [4]:
url_product = url_data.set_index('product_title')

In [5]:
def weighted_rank(df):
    total_count = df['# Of Reviews'].count()
    threshold = df['# Of Reviews'].mean() + df['# Of Reviews'].std()
    df['count_rank'] = df['# Of Reviews'].rank(ascending=False)
    df['rating_rank'] = df['Avg Rating'].rank(ascending=False)
    df['rank_score'] = np.where(df['# Of Reviews'] >= threshold,
                                       df['Avg Rating']/5 + (total_count-df['count_rank'])/total_count,
                                       (df['Avg Rating']/5 + ((1-((threshold-df['# Of Reviews'])/threshold))*(total_count-df['count_rank'])/total_count)))
    df['rank'] = df['rank_score'].rank(ascending=False)

In [6]:
def get_filtered_chart(df, item_per_page_number=10):
    filtered_df = df.groupby('product_title')['star_rating'].agg(['mean', 'count'])
    filtered_df.columns = ['Avg Rating', '# Of Reviews']
    weighted_rank(filtered_df)
    filtered_df = filtered_df[['Avg Rating', '# Of Reviews', 'rank']].sort_values('rank', ascending=True)
    return filtered_df.head(n=item_per_page_number)

In [None]:
def search_item(user_search_term):
    
    from nltk.tokenize import word_tokenize
    from nltk.corpus import stopwords
    
    words = word_tokenize(user_search_term)
    words_list = [word for word in words
                      if word not in stopwords.words()]
    search_term = '|'.join(words_list)
    
    search_df = url_product[url_product.index.str.contains(search_term, na=False, case=False)]
    return get_filtered_chart(search_df)

In [9]:
from tkinter import *
from PIL import ImageTk, Image

class search():

    def __init__(self):

        root = Tk()
        root.lift()
        root.attributes("-topmost", True)
        root.geometry("800x800")
        root.title("Amazeng")
        self.root = root

        # Build GUI
        canvas_width = 800
        canvas_height = 800
        canvas = Canvas(root, height=canvas_height, width=canvas_width)
        self.canvas = canvas

        # Insert logo on top
        self.logo_frame = Frame(root, width=canvas_width, height=150)
        self.logo_frame.pack(fill=X,expand=True)
#         logo_file = Image.open('main_logo_nobackground_white.png')
#         logo_file = logo_file.resize((800,300),Image.ANTIALIAS)
#         photo_logo = ImageTk.PhotoImage(logo_file)

        logo_file = 'main_logo_nobackground_white.png'
        logo_img = PhotoImage(logo_file)
        self.logo_label = Label(self.logo_frame, image=logo_img)
        self.logo_label.pack(filel=X, expand=True)
        
        # Product Search Box: 
        #change text button to logo img
        #remove previous input when clicked
        def removeValue(event):
            event.widget.delete(0, END)
        # for more color effects http://effbot.org/tkinterbook/entry.htm
        self.p_search_frame = Frame(root, width=canvas_width, height=100)
        self.p_search_frame.pack(fill=X,expand=True)
        # set up product search entry
        self.p_search = Entry(self.p_search_frame,bd=2,justify=LEFT,relief=GROOVE)
        self.p_search.insert(END, 'Product...')
        self.p_search.bind("<Button-1>", removeValue)
        self.p_search.pack(side='left')
        self.p_search.place(width=400,height=30,relx=0.49, rely=0.3, anchor=CENTER)
        self.p_search.focus_set() #send cursor to product search
        
        self.p_button = Button(self.p_search_frame, text="Search", width=8, command=lambda:self.searching())
        self.p_button.pack(side='left')
        self.p_button.place(relx=0.8,rely=0.3,anchor=CENTER)
        
        self.msg_var=StringVar()
        self.msg_label = Label(self.p_search_frame,textvariable=self.msg_var)
        self.msg_label.pack(side='bottom')
        self.msg_label.place(relx=0.5,rely=0.75,anchor=CENTER)
        
        self.new_button = Button(self.p_search_frame, text="New Search",width=10,command=lambda:self.check_product())
        self.new_button.pack(side='right')
        self.new_button.place(relx=0.8,rely=0.9,anchor=CENTER)

        # Display Keyword with checkboxes
        #align keywords in columns, go to next col if too many
        #delete when clicked x
        #update keyword list
        self.key_frame = Frame(root, width=canvas_width, height=150)
        self.key_frame.pack(fill=X,expand=True)
        self.key_label = Label(self.key_frame,text='Keywords')
        self.key_label.pack(side='top',pady=(0,5))
        self.cur_var = StringVar()
        self.cur_check = Checkbutton(self.key_frame,textvariable=self.cur_var)
        self.cur_check.place(anchor=CENTER)
        # Keyword Search Box
        #same layout adjustment
        #no input until there's a product

        self.k_search_frame = Frame(root, width=canvas_width, height=50)
        self.k_search_frame.pack(fill=X,expand=True)
        
        self.k_search = Entry(self.k_search_frame,bd=2,justify=LEFT,relief=GROOVE)
        self.k_search.insert(END, 'Keyword...')
        self.k_search.config(state='disabled',disabledbackground='grey')
        self.p_search.bind("<Button-1>", removeValue)
        self.k_search.pack(side='left')
        self.k_search.place(width=320,height=30,relx=0.49, rely=0.3, anchor=CENTER)
        if self.p_search.get():
            self.k_search.config(bg='white',state=NORMAL) #enable keyword search after input for product
        self.cur_key = StringVar()
        self.cur_key.set(self.k_search.get()) #get current key
#         keys = []
#         keys.append(self.cur_key) #add current key to the list of keywords
        self.k_button = Button(self.k_search_frame, text="Search", width=8, command=lambda:self.searching())
        self.k_button.pack(side='left')
        self.k_button.place(relx=0.75,rely=0.3,anchor=CENTER)
#         if self.k_button.invoke():
                  
        # Display Result
        #adjust size to fit window
        self.result_frame = Frame(root, width=canvas_width, height=300)
        self.result_frame.pack(side='bottom', fill=X, expand=True,pady=(0,20))
        self.scrollbar = Scrollbar(self.result_frame, orient=VERTICAL)
        self.rank_text = Text(self.result_frame,bd=0,yscrollcommand=self.scrollbar.set)
        self.scrollbar.config(command=self.rank_text.yview)
        self.scrollbar.pack(side=RIGHT, fill=Y)
        self.rank_text.pack(side=LEFT, fill=BOTH, expand=1)

        root.mainloop()
            
        
    # clear output and print error message when no product search is given
    def check_product(self):
        self.p_search.insert(END, 'Product...')
        self.p_search.bind("<Button-1>", removeValue)
        self.k_search.insert(END, 'Keyword...')
        self.k_search.config(state='disabled',disabledbackground='grey')
        self.p_search.bind("<Button-1>", removeValue)
        self.cur_check.destroy()
        self.cur_key.set(None)
        self.rank_text.delete(0,END)

    def searching(self):
        df_string = str()
        if len(self.p_search.get()) == 0:
            self.msg_var.set("Please enter a product name")
        else:
            self.msg_var.set('')
            self.k_search.config(state='normal')
            if len(self.k_search.get()) == 0:
                df_string = str(search_item(self.product))
                self.rank.insert(END,df_string)
            else:
                self.cur_var.set(self.cur_key)
                df_string = str(search_item(self.product,self.cur_key))
                self.rank_text.insert(END,df_string)

In [10]:
project = search()

TclError: image "main_logo_nobackground_white.png" doesn't exist