<a href="https://colab.research.google.com/github/kvamsi7/mscs/blob/mscs_nn_prj/CS5720-Neural%20Network%20and%20Deep%20Learning/NN_Final_Project/ML_approach.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

### Load Data

In [None]:
dataset_path_1 = r'/content/Restaurant_Reviews.tsv'
dataset_path_2 = r'/content/Restaurant reviews.csv'

In [None]:
data_set_1 = pd.read_csv(dataset_path_1,delimiter='\t')
data_set_2 = pd.read_csv(dataset_path_2,usecols=['Review','Rating'])

### Data Cleaning and Preprocessing

In [None]:
import re
import nltk
from nltk.corpus import stopwords,wordnet
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag
from string import punctuation

In [None]:
# Discard all rows with null values
data_set_2 = data_set_2.dropna()

In [None]:
data_set_2.Rating.value_counts()

Rating
5       3826
4       2373
1       1735
3       1192
2        684
4.5       69
3.5       47
2.5       19
1.5        9
Like       1
Name: count, dtype: int64

In [None]:
# drop like
data_set_2 = data_set_2.drop(data_set_2[data_set_2.Rating == 'Like'].index)

In [None]:
data_set_2['Rating']=pd.to_numeric(data_set_2['Rating'])

In [None]:
# analysis on the label

# create a feature with categorical reviews
def format_rating(rating):
    if rating < 3:
        return 0
    elif rating >= 3:
        return 1

category = data_set_2['Rating'].apply(format_rating)


In [None]:
data_set_2['Liked'] = pd.to_numeric(category,downcast='integer')

In [None]:
data_set_2 = data_set_2.drop('Rating',axis = 1)

In [None]:
# pd.merge(data_set_1,data_set_2,on='key')
data_set = pd.concat([data_set_1,data_set_2],ignore_index=True)

In [None]:
data_set.to_csv('data_set_c.csv')

In [None]:
# Function to check if a string contains emojis
def contains_emoji(text):
    emoji_pattern = re.compile("["
                               u"\U0001F600-\U0001F64F"  # emoticons
                               u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                               u"\U0001F680-\U0001F6FF"  # transport & map symbols
                               u"\U0001F700-\U0001F77F"  # alchemical symbols
                               u"\U0001F780-\U0001F7FF"  # Geometric Shapes Extended
                               u"\U0001F800-\U0001F8FF"  # Supplemental Arrows-C
                               u"\U0001F900-\U0001F9FF"  # Supplemental Symbols and Pictographs
                               u"\U0001FA00-\U0001FA6F"  # Chess Symbols
                               u"\U0001FA70-\U0001FAFF"  # Symbols and Pictographs Extended-A
                               u"\U00002702-\U000027B0"  # Dingbats
                               u"\U000024C2-\U0001F251"
                               "]+", flags=re.UNICODE)
    return bool(emoji_pattern.search(text))

# Filter reviews containing emojis
emojis_reviews = data_set['Review'][data_set['Review'].apply(contains_emoji)]

In [None]:
emojis_reviews.shape

(708,)

In [None]:
# remove the emoji from the text

# Function to remove emojis from a text while preserving attached words
def remove_emojis(text):
    emoji_pattern = re.compile("["
                               u"\U0001F600-\U0001F64F"  # emoticons
                               u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                               u"\U0001F680-\U0001F6FF"  # transport & map symbols
                               u"\U0001F700-\U0001F77F"  # alchemical symbols
                               u"\U0001F780-\U0001F7FF"  # Geometric Shapes Extended
                               u"\U0001F800-\U0001F8FF"  # Supplemental Arrows-C
                               u"\U0001F900-\U0001F9FF"  # Supplemental Symbols and Pictographs
                               u"\U0001FA00-\U0001FA6F"  # Chess Symbols
                               u"\U0001FA70-\U0001FAFF"  # Symbols and Pictographs Extended-A
                               u"\U00002702-\U000027B0"  # Dingbats
                               u"\U000024C2-\U0001F251"
                               "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', text)

data_set['Review'] = data_set['Review'].apply(remove_emojis)

In [None]:
# save the data

data_set.to_csv("cleaned_rest_review.csv")

In [None]:
# loading saved data
data_set = pd.read_csv("cleaned_rest_review.csv")

In [None]:
data_set = data_set[~data_set['Review'].isna()]
data_set.reset_index(inplace=True)

In [None]:
# helper function to get simple pos
def get_simple_pos(tag):
    if tag.startswith('J'):
        return wordnet.ADJ
    if tag.startswith('V'):
        return wordnet.VERB
    if tag.startswith('N'):
        return wordnet.NOUN
    if tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN

In [None]:
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))
# adding punctuations to stopwords
stop_words.update(list(punctuation))

def clean_review(words):
    output_words = []
    for word in words:
        # to store the output words
        if word.lower() not in stop_words:
            # actual pos
            pos = pos_tag([word])   # passing the word as an array because pos_tag expect and array of words,
                                    # otherwise will get the pos_tag of each character in the word
            # simple pos
            pos = get_simple_pos(pos[0][1])  # the position of the tag is 1 in the tuple of pos which is the output from above statement
            clean_word = lemmatizer.lemmatize(word,pos)
            output_words.append(clean_word.lower())
    return output_words

In [None]:
# build the corpus
ps = PorterStemmer()
lemmatizer = WordNetLemmatizer()

all_stopwords = stopwords.words('english')
all_stopwords.remove('not')
all_stopwords = set(all_stopwords)
all_stopwords.update(list(punctuation))

In [None]:
# an array to append all the cleaned text as corpus

def get_corpus(data):
    corpus = []

    for i in range(0, len(data)):
        review = re.sub(r'[^a-zA-Z]',' ',data[i])
        review = review.lower().split()
        output_words = []
        for word in review:
          if word.lower() not in all_stopwords:
            pos = pos_tag([word])
            pos = get_simple_pos(pos[0][1])
            clean_word = lemmatizer.lemmatize(word,pos)
            output_words.append(clean_word)
        review = ' '.join(output_words)
        corpus.append(review)
    return corpus

In [None]:
corpus = get_corpus(data_set['Review'])

### Data Transformation

In [None]:
# bag of words approach

from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000,ngram_range=(1,3))

In [None]:
X = cv.fit_transform(corpus).toarray()
y = data_set['Liked'].values

In [None]:
# saving bow dictionary

import pickle
bow_path = 'bow_sentiment_model.pkl'
pickle.dump(cv,open(bow_path,'wb'))

In [None]:
# split the train and test set

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.20,random_state = 23)

### Model Building

In [None]:
# using naive bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
nb_classifier = GaussianNB()

In [None]:
nb_classifier.fit(X_train,y_train)

### performance evaluation

In [None]:
from sklearn.metrics import confusion_matrix,accuracy_score

In [None]:
y_pred = nb_classifier.predict(X_test)
cm = confusion_matrix(y_test,y_pred)
print(cm)

accuracy_score(y_test,y_pred)

[[ 506  117]
 [ 540 1027]]


0.7

In [None]:
### SVC
from sklearn.svm import SVC
# let fit and test the results

svc = SVC(C=100,kernel='rbf')

svc.fit(X_train,y_train)

svc.score(X_test,y_test)

0.8579908675799087

In [None]:
### Random Forest

from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(n_jobs = -1)

rfc.fit(X_train,y_train)
rfc.score(X_test,y_test)

0.8803652968036529

In [None]:
# lets save svc model to predict future predictions
import joblib

joblib.dump(rfc,'rfc_sentiment_classifier_model_61_ac')

['rfc_sentiment_classifier_model_61_ac']

In [None]:
review_test_p = ["I recently had the pleasure of dining here, and it was an experience that exceeded all my expectations. From the moment we walked in, the ambiance set the stage for what was to be a memorable evening. The staff greeted us warmly, ensuring we felt welcomed and valued.The menu selection was impressive, offering a variety of dishes that catered to all preferences, including several innovative options for those with dietary restrictions. Each dish we ordered was a testament to the chef's expertise and passion for culinary excellence. The flavors were balanced perfectly, with each ingredient shining through without overpowering the others.What truly set this place apart was the attention to detail. The presentation of the food was artistic, the timing between courses was impeccable, and the staff went above and beyond to accommodate our requests, making us feel truly special.I cannot recommend this place enough. Whether you’re looking for a place to celebrate a special occasion or just in search of a delightful dining experience, this should be at the top of your list. We’re already looking forward to our next visit!"]
review_test_n = ["Unfortunately, my recent visit to the restaurant left much to be desired. Despite the high expectations set by its reputation, the experience was underwhelming from start to finish. Upon arrival, the greeting was lukewarm, and it took a noticeable amount of time before we were seated, despite having reservations.The menu, while extensive, seemed to lack coherence, and the descriptions did little to entice the palate or clarify what one might expect from each dish. When our orders finally arrived, the presentation was lackluster, and the flavors were surprisingly bland. A particular disappointment was the main course, which was not only overcooked but also arrived lukewarm, suggesting it had been sitting out for some time.Service throughout the evening was inconsistent; our server seemed disinterested and was seldom seen. Attempts to address our concerns about the meal were met with indifference, leaving us feeling unvalued as customers.Given the price point and the establishment's reputation, I expected a dining experience that delighted the senses and showcased culinary excellence. Unfortunately, what I encountered was a forgettable meal paired with service that failed to meet even basic standards of hospitality. It's unlikely I'll return or recommend this restaurant to others based on this visit."]
review_test_neu = ["My recent visit to the restaurant was a mixed experience. Walking in, the ambiance of the place was inviting, with a nicely decorated interior that promised a cozy dining atmosphere. The staff greeted us politely and seated us without delay, which was a good start to the evening.The menu presented a wide array of options, ranging from traditional favorites to some intriguing chef specials. It took some time to make our selections, partly due to the variety and partly because the menu descriptions could have been more detailed.When the food arrived, the presentation was decent, and the portions were generous. Some of the dishes we tried were quite satisfying, offering a good balance of flavors and freshness. However, a few items fell short of expectations, lacking the depth of flavor we anticipated. It was a hit or miss on the culinary front.Service was generally efficient, though it lacked the warmth and attentiveness that elevate a dining experience from good to great. Our server was courteous but seemed rushed, making our interactions feel somewhat transactional.The overall value for the money was fair, considering the portion sizes and the quality of the ingredients used. However, the inconsistency in the food and service left us feeling that while the restaurant has potential, there's room for improvement in execution and attention to detail.In conclusion, while the visit didn't fully meet our expectations, it wasn't a disappointing experience either. For those considering dining here, there might be dishes that delight, but I'd recommend managing your expectations when it comes to service and some menu items."]

In [None]:
corp_test_sample = get_corpus(review_test_p)

In [None]:
x_new_test = cv.transform(corp_test_sample).toarray()

In [None]:
y_pred_new =  svc.predict(x_new_test)
y_pred_new

array([1])

In [None]:
x_new_test

array([[0, 0, 0, ..., 0, 0, 0]])