In [1]:
import pandas as pd
import numpy as np

url = 'https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_Electronics_v1_00.tsv.gz'
url_data = pd.read_table(url, sep='\t', nrows=809815, error_bad_lines=False)

b'Skipping line 9076: expected 15 fields, saw 22\nSkipping line 19256: expected 15 fields, saw 22\nSkipping line 24313: expected 15 fields, saw 22\nSkipping line 47211: expected 15 fields, saw 22\nSkipping line 54295: expected 15 fields, saw 22\nSkipping line 56641: expected 15 fields, saw 22\nSkipping line 63067: expected 15 fields, saw 22\n'
b'Skipping line 93796: expected 15 fields, saw 22\n'
b'Skipping line 132806: expected 15 fields, saw 22\nSkipping line 164631: expected 15 fields, saw 22\nSkipping line 167019: expected 15 fields, saw 22\nSkipping line 167212: expected 15 fields, saw 22\n'
b'Skipping line 198103: expected 15 fields, saw 22\nSkipping line 199191: expected 15 fields, saw 22\nSkipping line 202841: expected 15 fields, saw 22\nSkipping line 218228: expected 15 fields, saw 22\nSkipping line 235900: expected 15 fields, saw 22\n'
b'Skipping line 277761: expected 15 fields, saw 22\nSkipping line 304582: expected 15 fields, saw 22\nSkipping line 312029: expected 15 fields,

In [2]:
url_product = url_data.set_index('product_title')

In [3]:
def weighted_rank(df):
    total_count = df['# Of Reviews'].count()
    threshold = df['# Of Reviews'].mean() + df['# Of Reviews'].std()
    df['count_rank'] = df['# Of Reviews'].rank(ascending=False)
    df['rating_rank'] = df['Avg Rating'].rank(ascending=False)
    df['rank_score'] = np.where(df['# Of Reviews'] >= threshold,
                                       df['Avg Rating']/5 + (total_count-df['count_rank'])/total_count,
                                       (df['Avg Rating']/5 + ((1-((threshold-df['# Of Reviews'])/threshold))*(total_count-df['count_rank'])/total_count)))
    df['rank'] = df['rank_score'].rank(ascending=False)

In [4]:
def get_filtered_chart(df, item_per_page_number=10):
    filtered_df = df.groupby('product_title')['star_rating'].agg(['mean', 'count'])
    filtered_df.columns = ['Avg Rating', '# Of Reviews']
    weighted_rank(filtered_df)
    filtered_df = filtered_df[['Avg Rating', '# Of Reviews', 'rank']].sort_values('rank', ascending=True)
    return filtered_df.head(n=item_per_page_number)

In [5]:
def search_item(user_search_term, user_review_term = ''):
    
    from nltk.tokenize import word_tokenize
    from nltk.corpus import stopwords
    import difflib
    
    #Get tokenized search term to check for in product name
    words = word_tokenize(user_search_term)
    words_list = [word for word in words
                      if word not in stopwords.words()]
    search = '|'.join(words_list)
    
    #Filter out peripheral products from search query
    peripheral_terms = ['cable', 'cord', 'case', 'cover', 'sleeve', 'mount', 'stand',
                        'wire', 'cloth', 'battery', 'holder', 'lens', 'adapter']
   
    for x in words_list:
        match = difflib.get_close_matches(x, peripheral_terms, n=1, cutoff=0.85)
        if match:
            peripheral_terms.remove(match[0])
    peripheral_search = '|'.join(peripheral_terms)
    
    #Search filtered query from database
    if user_review_term:
        search_df = url_product[url_product.index.str.contains(search, na=False, case=False)
                           & ~url_product.index.str.contains(peripheral_search, na=False, case=False)
                            & url_product['review_body'].str.contains(user_review_term, na=False, case=False)]
    else:
        search_df = url_product[url_product.index.str.contains(search, na=False, case=False)
                           & ~url_product.index.str.contains(peripheral_search, na=False, case=False)]
                        
    return get_filtered_chart(search_df)

In [21]:
!pip install pandastable

Collecting pandastable
[?25l  Downloading https://files.pythonhosted.org/packages/c9/9d/e4941f037bb6eb7169e50169f4fa783bce0c3e71e6f0af6e711fc4030239/pandastable-0.11.0.tar.gz (234kB)
[K    100% |████████████████████████████████| 235kB 6.2MB/s ta 0:00:01
Collecting future (from pandastable)
[?25l  Downloading https://files.pythonhosted.org/packages/90/52/e20466b85000a181e1e144fd8305caf2cf475e2f9674e797b222f8105f5f/future-0.17.1.tar.gz (829kB)
[K    100% |████████████████████████████████| 829kB 7.5MB/s eta 0:00:01
Building wheels for collected packages: pandastable, future
  Running setup.py bdist_wheel for pandastable ... [?25ldone
[?25h  Stored in directory: /Users/zhen/Library/Caches/pip/wheels/cd/ee/d0/9c6c73b74230f8a3be27801a5c9337c4ec2c12ec88ebd0a446
  Running setup.py bdist_wheel for future ... [?25ldone
[?25h  Stored in directory: /Users/zhen/Library/Caches/pip/wheels/0c/61/d2/d6b7317325828fbb39ee6ad559dbe4664d0896da4721bf379e
Successfully built pandastable future
[31mdi

In [37]:
from tkinter import *
from PIL import ImageTk, Image
from pandastable import Table, TableModel

class search():

    def __init__(self):

        root = Toplevel()
        root.lift()
        root.attributes("-topmost", True)
        root.geometry("1080x800")
        root.title("Amazeng")
        self.root = root

        # Build GUI
        canvas_width = 1080
        canvas_height = 800
        canvas = Canvas(root, height=canvas_height, width=canvas_width)
        self.canvas = canvas

        # Insert logo on top
        self.logo_frame = Frame(root, width=canvas_width, height=150)
        self.logo_frame.pack(fill=X,expand=True)

        logo_file = Image.open('main_logo_nobackground.png')
        logo_file = logo_file.resize((800,250),Image.ANTIALIAS)
        photo_logo = ImageTk.PhotoImage(logo_file)
        self.logo_label = Label(self.logo_frame, image=photo_logo)
        self.logo_label.pack(fill=X,padx=10,pady=(5,0))
        
        # Product Search Box: 
        #change text button to logo img
        #remove previous input when clicked
        def removeValue(event):
            event.widget.delete(0, END)
        # for more color effects http://effbot.org/tkinterbook/entry.htm
        self.p_search_frame = Frame(root, width=canvas_width, height=100)
        self.p_search_frame.pack(fill=X,expand=True)
        # set up product search entry
        self.p_search = Entry(self.p_search_frame,bd=2,justify=LEFT,relief=GROOVE)
        self.p_search.insert(END, 'Product...')
        self.p_search.bind("<Button-1>", removeValue)
        self.p_search.pack(side='left')
        self.p_search.place(width=500,height=30,relx=0.49, rely=0.3, anchor=CENTER)
        self.p_search.focus_set() #send cursor to product search
        
        self.p_button = Button(self.p_search_frame, text="Search", width=8, command=lambda:self.searching())
        self.p_button.pack(side='left')
        self.p_button.place(relx=0.8,rely=0.3,anchor=CENTER)
        
        self.msg_var=StringVar()
        self.msg_label = Label(self.p_search_frame,textvariable=self.msg_var)
        self.msg_label.pack(side='bottom')
        self.msg_label.place(relx=0.5,rely=0.75,anchor=CENTER)
        
        self.new_button = Button(self.p_search_frame, text="New Search",width=10,command=lambda:self.check_product())
        self.new_button.pack(side='right')
        self.new_button.place(relx=0.8,rely=0.9,anchor=CENTER)

        # Display Keyword with checkboxes
        #align keywords in columns, go to next col if too many
        #delete when clicked x
        #update keyword list
        self.key_frame = Frame(root, width=canvas_width, height=150)
        self.key_frame.pack(fill=X,expand=True,pady=(0,5))
        self.klabel = Label(self.key_frame,text='Keyword')
        self.klabel.pack(side='top')
        self.cur_var = StringVar()
        self.key_label = Label(self.key_frame,textvariable=self.cur_var)
        self.key_label.pack(side='top')
#         def hide_key(event):
#             event.widget.pack_forget()
#         self.cur_check.bind('<Button-1>', hide_key)
        
        # Keyword Search Box
        self.k_search_frame = Frame(root, width=canvas_width, height=50)
        self.k_search_frame.pack(fill=X,expand=True)
        
        self.k_search = Entry(self.k_search_frame,bd=2,justify=LEFT,relief=GROOVE)
        self.k_search.insert(END, 'Keyword...')
        self.k_search.bind("<Button-1>", removeValue)
        self.k_search.pack(side='left')
        self.k_search.place(width=400,height=30,relx=0.49, rely=0.3, anchor=CENTER)
        if self.p_search.get():
            self.k_search.config(bg='white',state=NORMAL) #enable keyword search after input for product
#         self.k_button = Button(self.k_search_frame, text="Search", width=8, command=lambda:self.searching())
#         self.k_button.pack(side='left')
#         self.k_button.place(relx=0.75,rely=0.3,anchor=CENTER)
                  
        # Display Result
        self.result_frame = Frame(root, width=canvas_width, height=300)
        self.result_frame.pack(side='bottom', fill=X, expand=True,pady=(0,20))
        self.rank_text = Text(self.result_frame)
        self.rank_text.pack(side='top',fill=X, expand=True, padx=5)
        
#         self.scrollbar = Scrollbar(self.result_frame, orient=HORIZONTAL)
#         self.rank_text = Text(self.result_frame,bd=2,xscrollcommand=self.scrollbar.set)
#         self.scrollbar.config(command=self.rank_text.xview)
#         self.scrollbar.pack(side=BOTTOM, fill=X)
#         self.rank_text.pack(side=TOP, fill=BOTH, expand=1)
        
        canvas.pack()
        root.mainloop()
            
        
    # clear output and print error message when no product search is given
    def check_product(self):
        self.p_search.delete(0,END)
        self.p_search.insert(END, 'Product...')
        self.k_search.delete(0,END)
        self.k_search.insert(END, 'Keyword...')
        self.cur_check.destroy()
        self.rank_text.delete('1.0',END)

    #call search_item function for input
    #print error msg if no product is entered
    def searching(self):
        df_string = str()
        if len(self.p_search.get()) == 0:
            self.msg_var.set("Please enter a product name")
        else:
            if self.p_search.get() == 'Product...':
                self.msg_var.set("Please enter a product name")
            else:
                self.msg_var.set('')
                self.k_search.config(state='normal')
                if len(self.k_search.get()) == 0 or self.k_search.get() == 'Keyword...':
                    df = search_item(self.p_search.get())
                    self.rank_text.insert(END, str(df.iloc[:10,:5]))
                    self.rank_text.pack(side='top',fill=X)
                else:
                    self.cur_var.set(self.k_search.get())
                    df = search_item(self.p_search.get(),self.k_search.get())
                    self.rank_text.insert(END, str(df.iloc[:10,:5]))
                    self.rank_text.pack(side='top',fill=X)

In [38]:
project = search()

SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
