In [7]:
from transformers import BertTokenizer, BertModel, get_linear_schedule_with_warmup
import torch
import os
from sklearn.preprocessing import StandardScaler
from tkinter import ttk

In [8]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert = BertModel.from_pretrained('bert-base-uncased')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [9]:
def scale_data():
    current_dir = os.getcwd()
    label_files = os.path.join(current_dir, 'labels')
    vector_files = os.path.join(current_dir, 'vectors')

    label_list = []
    vector_list =[]

    i = 0
    for filename in os.listdir(vector_files):
        file_path = os.path.join(vector_files, filename)
        vector_list.append(torch.load(file_path).detach())
        i+=1


    j = 0
    for filename in os.listdir(label_files):
        if j <= i:
            file_path = os.path.join(label_files, filename)
            label_list.append(torch.load(file_path).detach())
            j+=1
        else:
            break
            
    # Converts the labels and the bert output for each row into a tensor
    labels = torch.cat(label_list).float()
    final_pool = torch.cat(vector_list).float()

    # Divide all the labels by 4 so that they're in range [0,1]
    labels /= 4

    # Subtract so that we can better 'bucketize' these values 
    labels -= .5

    # Organizes each label so that the target value is grouped to either 0 or 1
    labels = torch.clip(labels, min=0, max=1)
    labels = torch.sign(labels)

    # Set up a standard scaler
    scaler = StandardScaler()

    # Fit the scaler to our data
    scaler.fit(final_pool)

    return scaler

In [10]:
from tkinter import *
from PIL import ImageTk, Image
import torch

import nbimporter
#import NewModel as NM

master = Tk()

# Read the Image
image = Image.open("yelp.png")
 
# Resize the image using resize() method
resize_image = image.resize((100, 100))
 
img = ImageTk.PhotoImage(resize_image)
 
# create label and add resize image
label1 = Label(image=img)
label1.image = img
label1.grid(row=0)

#add enter review to tkinter
Label(master, text='Enter Review: ').grid(row=1)
e1 = Entry(master)
e1.grid(row=2, column=0)

#function to create pop up message window
def popupmsg(msg):
    popup = Tk()
    popup.wm_title("!")
    label = ttk.Label(popup, text=msg)
    label.pack(side="top", fill="x", pady=10)
    B1 = ttk.Button(popup, text="Okay", command = popup.destroy)
    B1.pack()
    popup.mainloop()

data_scaler = scale_data()

def buttonClick():
    
    user_review = e1.get()
    
    # Set up the layers and activation functions of our model
    model = torch.nn.Sequential(   
        torch.nn.Linear(768,800),
        torch.nn.SiLU(),
        torch.nn.Linear(800,64),
        torch.nn.SiLU(),
        torch.nn.Linear(64,64),
        torch.nn.SiLU(),
        torch.nn.Linear(64,64),
        torch.nn.SiLU(),
        torch.nn.Linear(64,64),
        torch.nn.SiLU(),
        torch.nn.Linear(64,64),
        torch.nn.SiLU(),
        torch.nn.Linear(64,16),
        torch.nn.SiLU(),
        torch.nn.Linear(16,1)
    )
    
        
    # Load the saved model
    model.load_state_dict(torch.load("model.pth"))

    # Set the model to evaluation mode
    model.eval()
    
    # Get the tokenized and encoded values 
    encoded_user_input = tokenizer.encode(user_review, add_special_tokens=True, return_tensors='pt')
    
    # Tokenize the text and get input IDs and attention mask
    user_review_token_ids = torch.tensor([tokenizer.encode(user_review)])
    
    # Make the attention mask
    attention_mask = torch.ones_like(user_review_token_ids)

    # Pass input IDs and attention mask through BERT model
    bert_output = bert(user_review_token_ids, attention_mask)
    
    # Scale the data based on our old dataset
    scaled_bert_vector = data_scaler.transform(bert_output[1].detach())

    # Use the model to make a prediction
    with torch.no_grad():
        scaled_bert_tensor = torch.from_numpy(scaled_bert_vector)
        
        # Get the actual prediction based on our model!
        y_pred = torch.sigmoid(model(scaled_bert_tensor.float()))
        #y_pred = model(scaled_bert_tensor.float())
        
        if y_pred < 0.5:
            y_pred = 'Negative review likely'
        else:
            y_pred = 'Positive review likely'

    popupmsg(y_pred)
    
        
#sets up tkinker button
button = Button(master, text="Enter", command=buttonClick)
button.grid(row=3, column=0)
    
#displays
master.mainloop()