### 1. Imports

In [32]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import numpy as np
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd

### 2. Instantiating and setting-up the model

In [2]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

Downloading: 100%|█████████████████████████████████████████████████████████████████████| 953/953 [00:00<00:00, 482kB/s]
Downloading: 100%|██████████████████████████████████████████████████████████████████| 872k/872k [00:00<00:00, 5.19MB/s]
Downloading: 100%|████████████████████████████████████████████████████████████████████| 112/112 [00:00<00:00, 55.9kB/s]
Downloading: 100%|██████████████████████████████████████████████████████████████████| 669M/669M [00:29<00:00, 23.0MB/s]


### 3. Encode and Calculate Sentiment

In [10]:
# tokens = tokenizer.encode('I hated this movie, not worth your money',return_tensors = 'pt')
tokens = tokenizer.encode('Such an amazing movie, so goood!!',return_tensors = 'pt')

In [11]:
tokens

tensor([[  101, 11165, 10144, 39854, 13113,   117, 10297, 11335, 40454,   106,
           106,   102]])

In [12]:
tokenizer.decode(tokens[0])

'[CLS] such an amazing movie, so goood!! [SEP]'

In [13]:
result = model(tokens)

In [14]:
result

SequenceClassifierOutput(loss=None, logits=tensor([[-1.8765, -2.2570, -0.9621,  0.9105,  3.4774]],
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [15]:
torch.argmax(result.logits)

tensor(4)

### 4. Collect reviews by scraping the web

In [24]:
r = requests.get('https://www.yelp.com/biz/honest-lowell?osq=Honest')
soup = BeautifulSoup(r.text,'html.parser')
regex = re.compile('.*comment.*')
results = soup.find_all('p',{'class':regex})
reviews = [result.text for result in results]

In [30]:
reviews[2]

"Nice restaurant to eat dosa, sandwiches, dahipuri. Rest is ok food.Punjabi food is not good I don't like, paratha is so hard and burned. Falooda is tasty but over priced."

### 5. Load the reviews into a DataFrame and score those reviews with the transformer model

In [33]:
df = pd.DataFrame(np.array(reviews), columns=['review'])

In [35]:
df['review'].iloc[2]

"Nice restaurant to eat dosa, sandwiches, dahipuri. Rest is ok food.Punjabi food is not good I don't like, paratha is so hard and burned. Falooda is tasty but over priced."

In [40]:
df.head()

Unnamed: 0,review
0,Getting a 5-star rating from this Yelp-elite i...
1,We have tried a bunch of their dishes over the...
2,"Nice restaurant to eat dosa, sandwiches, dahip..."
3,"Excellent taste, wait time is too much after 6..."
4,Been to this place multiple times and loved th...


In [41]:
len(df)

11

In [37]:
def sentiment_score(review):
    tokens = tokenizer.encode(review, return_tensors='pt')
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

In [42]:
sentiment_score(df['review'].iloc[2])

3

In [43]:
df['sentiment'] = df['review'].apply(lambda x: sentiment_score(x[:512]))

In [44]:
df

Unnamed: 0,review,sentiment
0,Getting a 5-star rating from this Yelp-elite i...,4
1,We have tried a bunch of their dishes over the...,2
2,"Nice restaurant to eat dosa, sandwiches, dahip...",3
3,"Excellent taste, wait time is too much after 6...",4
4,Been to this place multiple times and loved th...,4
5,This place is a Indian food heaven. I have bee...,5
6,(1) Chinese : you only get Best Indo-Chinese a...,2
7,I will absolutely be going again ! The food wa...,5
8,I rarely leave reviews and I've been to so man...,5
9,"Honest? Okay, let me be honest with you! Very ...",1


In [45]:
df['review'].iloc[10]

'On 9/12/2022 evening, we visited honest lowell restaurant and ordered veg samosa, dry veg Manchurian, veg tripple schezwan rice, masala papad and Ganga Jamuna juice. The food was tasty but very oily. Next day I woke up with a swollen face and constipated stomach for which I had to book a doctors appointment. I will never visit this restaurant again.'