In [15]:
from src.data import make_dataset
from transformers import pipeline
from sklearn.metrics import classification_report
import torch
import pandas as pd
import gc

In [16]:
if torch.cuda.is_available():
    device = 0
else:
    device = -1

In [17]:
# input_file = "../../data/raw/reviews.csv"
# train_output_file = "../../data/processed/train_final_processed_reviews.csv"
# test_output_file = "../../data/processed/test_final_processed_reviews.csv"
# X_train, X_test, y_train, y_test = make_dataset.main(input_file, train_output_file, test_output_file)


In [18]:
train = pd.read_csv("../../data/processed/train_final_processed_reviews.csv", index_col='Unnamed: 0')
test = pd.read_csv("../../data/processed/test_final_processed_reviews.csv", index_col='Unnamed: 0')
X_train = train.drop('sentiment', axis=1)
X_test = test.drop('sentiment', axis=1)
y_train = train.sentiment
y_test = test.sentiment

In [19]:
X_test.head()

Unnamed: 0,time,text,cleaned_text,lowercase_count,uppercase_count,uppercase_words,uppercase_ratio,punc_count,num_digits,num_verbs,num_nouns,num_tokens_cleaned,num_tokens_raw,num_words_misspelled,polarity,subjectivity,num_pos_words,num_neg_words
940,8/6/21,"This tea is amazing, I cannot believe the pric...",tea amazing believe price price pay less half ...,38,1,FAST,0.02381,6,0,0,9,17,42,0,0.9468,0.583333,4,0
3920,4/9/21,I've tried a handful of other brands of French...,try handful brand french roast find one tully ...,41,0,,0.0,4,2,2,6,17,43,2,0.6249,0.5875,1,0
5038,9/12/20,I would recommend not buying these nuts if you...,would recommend buying nut look really good ta...,70,0,,0.0,7,0,5,13,32,72,0,0.8399,0.563719,0,0
3432,21/4/21,"I wish these came in larger quantities, if I c...",wish come large quantity could cuppa everyday ...,20,0,,0.0,3,0,3,2,10,23,0,0.4019,0.514286,0,0
2376,13/1/20,Has a smooth taste like it claims without any ...,smooth taste like claim without unpleasant met...,68,0,,0.0,5,0,2,13,36,71,3,0.9408,0.605,0,0


In [20]:
y_test.head()

940     positive
3920    positive
5038    negative
3432    positive
2376    positive
Name: sentiment, dtype: object

In [21]:
data = X_test.text.to_list()
data = [x[:512] if len(x)>512 else x for x in data]

In [22]:
%%time
sentiment_pipeline = pipeline(model = "distilbert-base-uncased-finetuned-sst-2-english", device=device)
results0 = sentiment_pipeline(data)
results0[0:5]

CPU times: total: 5.89 s
Wall time: 6.7 s


[{'label': 'POSITIVE', 'score': 0.9998703002929688},
 {'label': 'POSITIVE', 'score': 0.9991003274917603},
 {'label': 'NEGATIVE', 'score': 0.9987446069717407},
 {'label': 'POSITIVE', 'score': 0.9997642636299133},
 {'label': 'POSITIVE', 'score': 0.9972267746925354}]

In [23]:
%%time
specific_model = pipeline(model="nlptown/bert-base-multilingual-uncased-sentiment", device=device)
results1 = specific_model(data)
results1[0:5]

CPU times: total: 10.1 s
Wall time: 10.9 s


[{'label': '5 stars', 'score': 0.9336758852005005},
 {'label': '5 stars', 'score': 0.4628945291042328},
 {'label': '2 stars', 'score': 0.555162250995636},
 {'label': '4 stars', 'score': 0.5062201619148254},
 {'label': '4 stars', 'score': 0.6108953952789307}]

In [24]:
%%time
specific_model = pipeline(model="Seethal/sentiment_analysis_generic_dataset", device=device)
results2 = specific_model(data)
results2[0:5]

CPU times: total: 6.02 s
Wall time: 6.82 s


[{'label': 'LABEL_2', 'score': 0.9964131712913513},
 {'label': 'LABEL_2', 'score': 0.9589166045188904},
 {'label': 'LABEL_0', 'score': 0.7314499020576477},
 {'label': 'LABEL_2', 'score': 0.9967833757400513},
 {'label': 'LABEL_2', 'score': 0.9085060358047485}]

In [25]:
%%time
specific_model = pipeline('sentiment-analysis', model="siebert/sentiment-roberta-large-english", device=device)
results3 = specific_model(data)
results3[0:5]

CPU times: total: 18.9 s
Wall time: 18.2 s


[{'label': 'POSITIVE', 'score': 0.9989363551139832},
 {'label': 'POSITIVE', 'score': 0.9988914132118225},
 {'label': 'NEGATIVE', 'score': 0.9995133876800537},
 {'label': 'POSITIVE', 'score': 0.9988318085670471},
 {'label': 'POSITIVE', 'score': 0.9908561706542969}]

# Test Results

In [26]:
y_true = [1 if label=='positive' else 0 for label in y_test]

## Results for distilbert-base-uncased-finetuned-sst-2-english

In [27]:
labels = [result['label'] for result in results0]
y_pred = [1 if label=='POSITIVE' else 0 for label in labels]
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.66      0.90      0.76       283
           1       0.96      0.84      0.90       806

    accuracy                           0.86      1089
   macro avg       0.81      0.87      0.83      1089
weighted avg       0.88      0.86      0.86      1089



## Results for nlptown/bert-base-multilingual-uncased-sentiment"

In [28]:
labels = [result['label'] for result in results1]
y_pred = [1 if label in ['5 stars', '4 stars', '3 stars'] else 0 for label in labels]
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.84      0.84      0.84       283
           1       0.94      0.94      0.94       806

    accuracy                           0.92      1089
   macro avg       0.89      0.89      0.89      1089
weighted avg       0.92      0.92      0.92      1089



## Results for Seethal/sentiment_analysis_generic_dataset

In [29]:
labels = [result['label'] for result in results2]
y_pred = [1 if label=='LABEL_2' else 0 for label in labels]
# y_pred = [0 if label=='LABEL_0' else 1 for label in labels]
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.72      0.93      0.81       283
           1       0.97      0.87      0.92       806

    accuracy                           0.89      1089
   macro avg       0.84      0.90      0.86      1089
weighted avg       0.91      0.89      0.89      1089



## Results for siebert/sentiment-roberta-large-english

In [30]:
labels = [result['label'] for result in results3]
y_pred = [1 if label=='POSITIVE' else 0 for label in labels]
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.89      0.92      0.91       283
           1       0.97      0.96      0.97       806

    accuracy                           0.95      1089
   macro avg       0.93      0.94      0.94      1089
weighted avg       0.95      0.95      0.95      1089



In [31]:
sentiment_pipeline=None
specific_model=None
gc.collect()
torch.cuda.empty_cache()

In [32]:
from src.models.sentiment_analysis.pre_trained.seibert import Seibert

model = Seibert()
pred = model.predict(X_test)
print(classification_report(y_true, pred))

              precision    recall  f1-score   support

           0       0.89      0.92      0.91       283
           1       0.97      0.96      0.97       806

    accuracy                           0.95      1089
   macro avg       0.93      0.94      0.94      1089
weighted avg       0.95      0.95      0.95      1089



In [33]:
# from src.models.sentiment_analysis.pre_trained.bert_fine_tuned import BertFineTuned
# model = BertFineTuned('bert_state_dict.pt')
# pred = model.predict(X_test)
# print(classification_report(y_true, pred))