In [1]:
# this notebook is inspired by @robikscube
#30-06-2022
#dataset amazon-fine-food-reviews

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from tqdm.notebook import tqdm

In [2]:
dataset=pd.read_csv("../input/amazon-fine-food-reviews/Reviews.csv")
dataset=dataset.head(100)
print(dataset.shape)

(100, 10)


In [3]:
dataset.head(10)

Unnamed: 0,Id,ProductId,UserId,ProfileName,HelpfulnessNumerator,HelpfulnessDenominator,Score,Time,Summary,Text
0,1,B001E4KFG0,A3SGXH7AUHU8GW,delmartian,1,1,5,1303862400,Good Quality Dog Food,I have bought several of the Vitality canned d...
1,2,B00813GRG4,A1D87F6ZCVE5NK,dll pa,0,0,1,1346976000,Not as Advertised,Product arrived labeled as Jumbo Salted Peanut...
2,3,B000LQOCH0,ABXLMWJIXXAIN,"Natalia Corres ""Natalia Corres""",1,1,4,1219017600,"""Delight"" says it all",This is a confection that has been around a fe...
3,4,B000UA0QIQ,A395BORC6FGVXV,Karl,3,3,2,1307923200,Cough Medicine,If you are looking for the secret ingredient i...
4,5,B006K2ZZ7K,A1UQRSCLF8GW1T,"Michael D. Bigham ""M. Wassir""",0,0,5,1350777600,Great taffy,Great taffy at a great price. There was a wid...
5,6,B006K2ZZ7K,ADT0SRK1MGOEU,Twoapennything,0,0,4,1342051200,Nice Taffy,I got a wild hair for taffy and ordered this f...
6,7,B006K2ZZ7K,A1SP2KVKFXXRU1,David C. Sullivan,0,0,5,1340150400,Great! Just as good as the expensive brands!,This saltwater taffy had great flavors and was...
7,8,B006K2ZZ7K,A3JRGQVEQN31IQ,Pamela G. Williams,0,0,5,1336003200,"Wonderful, tasty taffy",This taffy is so good. It is very soft and ch...
8,9,B000E7L2R4,A1MZYO9TZK0BBI,R. James,1,1,5,1322006400,Yay Barley,Right now I'm mostly just sprouting this so my...
9,10,B00171APVA,A21BT40VZCCYT4,Carol A. Reed,0,0,5,1351209600,Healthy Dog Food,This is a very healthy dog food. Good for thei...


In [4]:
print(dataset["Text"][1])

Product arrived labeled as Jumbo Salted Peanuts...the peanuts were actually small sized unsalted. Not sure if this was an error or if the vendor intended to represent the product as "Jumbo".


In [5]:
exp_sentence=dataset["Text"][1]
print(exp_sentence)

exp_tokens=nltk.word_tokenize(exp_sentence)
print(exp_tokens)

Product arrived labeled as Jumbo Salted Peanuts...the peanuts were actually small sized unsalted. Not sure if this was an error or if the vendor intended to represent the product as "Jumbo".
['Product', 'arrived', 'labeled', 'as', 'Jumbo', 'Salted', 'Peanuts', '...', 'the', 'peanuts', 'were', 'actually', 'small', 'sized', 'unsalted', '.', 'Not', 'sure', 'if', 'this', 'was', 'an', 'error', 'or', 'if', 'the', 'vendor', 'intended', 'to', 'represent', 'the', 'product', 'as', '``', 'Jumbo', "''", '.']


In [6]:
nltk.pos_tag(exp_tokens[:10])

[('Product', 'NNP'),
 ('arrived', 'VBD'),
 ('labeled', 'VBN'),
 ('as', 'IN'),
 ('Jumbo', 'NNP'),
 ('Salted', 'NNP'),
 ('Peanuts', 'NNP'),
 ('...', ':'),
 ('the', 'DT'),
 ('peanuts', 'NNS')]

# Using Roberta Model for Sentiment Analysis

In [7]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from scipy.special import softmax

In [8]:
MODEL=f"cardiffnlp/twitter-roberta-base-sentiment"
tokenizer=AutoTokenizer.from_pretrained(MODEL)
model=AutoModelForSequenceClassification.from_pretrained(MODEL)

Downloading:   0%|          | 0.00/747 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/476M [00:00<?, ?B/s]

In [9]:
print(exp_sentence)

Product arrived labeled as Jumbo Salted Peanuts...the peanuts were actually small sized unsalted. Not sure if this was an error or if the vendor intended to represent the product as "Jumbo".


In [10]:
def spa_roberta(exp_sentence):
    # sentece polarity analysis Roberta
    encoded_text=tokenizer(exp_sentence,return_tensors="pt")
    output=model(**encoded_text)
    scores=output[0][0].detach().numpy()
    scores=softmax(scores)
    scores_dict={
    "roberta_neg":scores[0],
    "roberta_neu":scores[1],
    "roberta_pos":scores[2],
    }
    return scores_dict

In [11]:
print(spa_roberta(exp_sentence))

{'roberta_neg': 0.5089861, 'roberta_neu': 0.45241356, 'roberta_pos': 0.03860036}


In [12]:
final_score={}
for i, row in tqdm(dataset.iterrows(),total=len(dataset)):
    try:
        text=row["Text"]
        t_id=row["Id"]
        roberta_score=spa_roberta(text)
        final_score[t_id]=[roberta_score,text]
    except RuntimeError:
        print(f"Broke for id {t_id}")


  0%|          | 0/100 [00:00<?, ?it/s]

Broke for id 83


In [13]:
print(final_score[3])
print(final_score[4])
# This dictionary can be formatted to get proper labels

[{'roberta_neg': 0.0032288982, 'roberta_neu': 0.09806739, 'roberta_pos': 0.8987036}, 'This is a confection that has been around a few centuries.  It is a light, pillowy citrus gelatin with nuts - in this case Filberts. And it is cut into tiny squares and then liberally coated with powdered sugar.  And it is a tiny mouthful of heaven.  Not too chewy, and very flavorful.  I highly recommend this yummy treat.  If you are familiar with the story of C.S. Lewis\' "The Lion, The Witch, and The Wardrobe" - this is the treat that seduces Edmund into selling out his Brother and Sisters to the Witch.']
[{'roberta_neg': 0.00229513, 'roberta_neu': 0.090219304, 'roberta_pos': 0.9074856}, 'If you are looking for the secret ingredient in Robitussin I believe I have found it.  I got this in addition to the Root Beer Extract I ordered (which was good) and made some cherry soda.  The flavor is very medicinal.']


In [14]:
# print(dataset["Id"][0])

import colorama
from colorama import Fore

In [15]:
for id,value in final_score.items():
    score,text=value
    # print(score,text)
    # print(score["roberta_neg"],score["roberta_pos"])
    if score["roberta_neg"]>score["roberta_pos"]:
        print(Fore.RED + text[:100])
    else:
        print(Fore.GREEN + text[:100])
        


[32mI have bought several of the Vitality canned dog food products and have found them all to be of good
[31mProduct arrived labeled as Jumbo Salted Peanuts...the peanuts were actually small sized unsalted. No
[32mThis is a confection that has been around a few centuries.  It is a light, pillowy citrus gelatin wi
[32mIf you are looking for the secret ingredient in Robitussin I believe I have found it.  I got this in
[32mGreat taffy at a great price.  There was a wide assortment of yummy taffy.  Delivery was very quick.
[32mI got a wild hair for taffy and ordered this five pound bag. The taffy was all very enjoyable with m
[32mThis saltwater taffy had great flavors and was very soft and chewy.  Each candy was individually wra
[32mThis taffy is so good.  It is very soft and chewy.  The flavors are amazing.  I would definitely rec
[32mRight now I'm mostly just sprouting this so my cats can eat the grass. They love it. I rotate it aro
[32mThis is a very healthy dog food. Good for