In [1]:
import numpy as np
import pandas as pd
import time
from nltk.sentiment import SentimentIntensityAnalyzer
from transformers import RobertaTokenizer
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from transformers import RobertaTokenizer, RobertaForSequenceClassification, TextClassificationPipeline
import torch
from sklearn.metrics import classification_report

# Load Data+ Data preprocessing

In [2]:
%%time
columns_to_load = ['app_name', 'language', 'review','recommended']
df = pd.read_csv('steam_reviews.csv', usecols=columns_to_load) 
eng_df_og = df[df['language'] == 'english']
eng_df_og = eng_df_og.dropna(subset=['review'])
eng_df_og.head(10)
eng_df = eng_df_og.sample(frac=0.01, random_state=42)
eng_df['recommended'] = eng_df['recommended'].map({True: 1, False: 0})
eng_df.reset_index(drop=True, inplace=True)
def preprocess_review(review):
    return review.lower()  

eng_df['processed_review'] = eng_df['review'].apply(preprocess_review)
eng_df = eng_df.drop(columns=['review'])
train_df, valid_df = train_test_split(eng_df, test_size=0.1, random_state=1)

CPU times: total: 1min 16s
Wall time: 1min 16s


In [3]:
def my_evaluate_sentiment(texts, pipeline , batch_size=1024):
    labels = []
    scores = []
    for i in range(0, len(texts), batch_size):
        batch_texts = texts[i:i+batch_size]
        batch_results = pipeline(batch_texts)
        labels.extend([res['label'] for res in batch_results])
        scores.extend([res['score'] for res in batch_results])
    return labels, scores

## Base RoBERTa

In [4]:
%%time
model_base_roberta = RobertaForSequenceClassification.from_pretrained('roberta-base')
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
device = 0 if torch.cuda.is_available() else -1  # Use GPU (device 0) if available, else use CPU (-1)
base_roberta_sentiment_pipeline = TextClassificationPipeline(model=model_base_roberta, tokenizer=tokenizer, framework='pt', device=device, truncation=True)
base_roberta_df = valid_df.copy()

result_base = my_evaluate_sentiment(base_roberta_df['processed_review'].tolist(),pipeline=base_roberta_sentiment_pipeline)
base_roberta_df['base_roberta_sentiment'] = result_base[0]
base_roberta_df['base_roberta_sentiment'] = base_roberta_df['base_roberta_sentiment'].map({'LABEL_1': 1, 'LABEL_0': 0})
print(classification_report(base_roberta_df['recommended'],base_roberta_df['base_roberta_sentiment']))

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1031
           1       0.89      1.00      0.94      8588

    accuracy                           0.89      9619
   macro avg       0.45      0.50      0.47      9619
weighted avg       0.80      0.89      0.84      9619

CPU times: total: 1min 48s
Wall time: 1min 46s


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
filtered_df_base = base_roberta_df[base_roberta_df['recommended'] != base_roberta_df['base_roberta_sentiment']]
filtered_df_base.to_excel('base_roberta_mismatched_data.xlsx', index=False)


## Own Fine Tune EPO3

In [5]:
%%time
###USING MY FINE TUNED

model_path_epo3 = './Fine_tuned_epo3'  # Update model path
model_epo3 = RobertaForSequenceClassification.from_pretrained(model_path_epo3)
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
device = 0 if torch.cuda.is_available() else -1  
my_finetune_sentiment_pipeline_epo3 = TextClassificationPipeline(model=model_epo3, tokenizer=tokenizer, framework='pt', device=device, truncation=True)

my_finetunedf_epo3 = valid_df.copy()
results = my_evaluate_sentiment(my_finetunedf_epo3['processed_review'].tolist(),pipeline=my_finetune_sentiment_pipeline_epo3)
my_finetunedf_epo3['my_finetune_roberta_sentiment'], my_finetunedf_epo3['my_finetune_roberta_score'] = results[0], results[1]
my_finetunedf_epo3['my_finetune_roberta_sentiment'] = my_finetunedf_epo3['my_finetune_roberta_sentiment'].map({'LABEL_1': 1, 'LABEL_0': 0})
print(classification_report(my_finetunedf_epo3['recommended'],my_finetunedf_epo3['my_finetune_roberta_sentiment']))

              precision    recall  f1-score   support

           0       0.80      0.74      0.77      1031
           1       0.97      0.98      0.97      8588

    accuracy                           0.95      9619
   macro avg       0.89      0.86      0.87      9619
weighted avg       0.95      0.95      0.95      9619

CPU times: total: 1min 47s
Wall time: 1min 44s


## Own Fine Tune EPO4

In [6]:
%%time
model_path_epo4 = './Fine_tuned_epo4'  # Update model path
model_epo4 = RobertaForSequenceClassification.from_pretrained(model_path_epo4)
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
device = 0 if torch.cuda.is_available() else -1  # Use GPU (device 0) if available, else use CPU (-1)
my_finetune_sentiment_pipeline_epo4 = TextClassificationPipeline(model=model_epo4, tokenizer=tokenizer, framework='pt', device=device, truncation=True)

my_finetunedf_epo4 = valid_df.copy()
results = my_evaluate_sentiment(my_finetunedf_epo4['processed_review'].tolist(),pipeline=my_finetune_sentiment_pipeline_epo4)
my_finetunedf_epo4['my_finetune_roberta_sentiment'], my_finetunedf_epo4['my_finetune_roberta_score'] = results[0], results[1]
my_finetunedf_epo4['my_finetune_roberta_sentiment'] = my_finetunedf_epo4['my_finetune_roberta_sentiment'].map({'LABEL_1': 1, 'LABEL_0': 0})

print(classification_report(my_finetunedf_epo4['recommended'],my_finetunedf_epo4['my_finetune_roberta_sentiment']))

              precision    recall  f1-score   support

           0       0.78      0.77      0.77      1031
           1       0.97      0.97      0.97      8588

    accuracy                           0.95      9619
   macro avg       0.87      0.87      0.87      9619
weighted avg       0.95      0.95      0.95      9619

CPU times: total: 1min 49s
Wall time: 1min 46s


### Store as xlsx for manual analysis

In [7]:
filtered_df_epo4 = my_finetunedf_epo4[my_finetunedf_epo4['recommended'] != my_finetunedf_epo4['my_finetune_roberta_sentiment']]
filtered_df_epo4.to_excel('False_classifications_epo4.xlsx', index=False)

## Own Fine Tune EPO5

In [8]:
%%time
model_path_epo5 = './Fine_tuned_epo5'  # Update model path
model_epo5 = RobertaForSequenceClassification.from_pretrained(model_path_epo5)
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
device = 0 if torch.cuda.is_available() else -1  # Use GPU (device 0) if available, else use CPU (-1)
my_finetune_sentiment_pipeline_epo5 = TextClassificationPipeline(model=model_epo5, tokenizer=tokenizer, framework='pt', device=device, truncation=True)

my_finetunedf_epo5 = valid_df.copy()
results_epo5 = my_evaluate_sentiment(my_finetunedf_epo5['processed_review'].tolist(),pipeline=my_finetune_sentiment_pipeline_epo5)
my_finetunedf_epo5['my_finetune_roberta_sentiment'], my_finetunedf_epo5['my_finetune_roberta_score'] = results_epo5[0], results_epo5[1]
my_finetunedf_epo5['my_finetune_roberta_sentiment'] = my_finetunedf_epo5['my_finetune_roberta_sentiment'].map({'LABEL_1': 1, 'LABEL_0': 0})
print(classification_report(my_finetunedf_epo5['recommended'],my_finetunedf_epo5['my_finetune_roberta_sentiment']))

              precision    recall  f1-score   support

           0       0.86      0.64      0.74      1031
           1       0.96      0.99      0.97      8588

    accuracy                           0.95      9619
   macro avg       0.91      0.81      0.85      9619
weighted avg       0.95      0.95      0.95      9619

CPU times: total: 1min 48s
Wall time: 1min 46s


### Store as xlsx for manual analysis

In [9]:

filtered_df_ep05 = my_finetunedf_epo5[my_finetunedf_epo5['recommended'] != my_finetunedf_epo5['my_finetune_roberta_sentiment']]
filtered_df_ep05.to_excel('False_classifications_epo5.xlsx', index=False)

## Own Fine Tune EPO10

In [10]:
%%time

model_path_epo10 = './Fine_tuned_epo10'  # Update model path
model_epo10 = RobertaForSequenceClassification.from_pretrained(model_path_epo10)
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
device = 0 if torch.cuda.is_available() else -1  # Use GPU (device 0) if available, else use CPU (-1)
my_finetune_sentiment_pipeline_epo10 = TextClassificationPipeline(model=model_epo10, tokenizer=tokenizer, framework='pt', device=device, truncation=True)

my_finetunedf_epo10 = valid_df.copy()
results = my_evaluate_sentiment(my_finetunedf_epo10['processed_review'].tolist(),pipeline=my_finetune_sentiment_pipeline_epo10)
my_finetunedf_epo10['my_finetune_roberta_sentiment'], my_finetunedf_epo10['my_finetune_roberta_score'] = results[0], results[1]
my_finetunedf_epo10['my_finetune_roberta_sentiment'] = my_finetunedf_epo10['my_finetune_roberta_sentiment'].map({'LABEL_1': 1, 'LABEL_0': 0})
print(classification_report(my_finetunedf_epo10['recommended'],my_finetunedf_epo10['my_finetune_roberta_sentiment']))

              precision    recall  f1-score   support

           0       0.80      0.73      0.77      1031
           1       0.97      0.98      0.97      8588

    accuracy                           0.95      9619
   macro avg       0.89      0.86      0.87      9619
weighted avg       0.95      0.95      0.95      9619

CPU times: total: 1min 46s
Wall time: 1min 45s


## Analysis of misclassification reviews

In [14]:
import pandas as pd

# Load the Excel file
Base_analysis = pd.read_excel('base_roberta_mismatched_data.xlsx')
EPO4_analysis = pd.read_excel('False_classifications_epo4.xlsx')
EPO5_analysis = pd.read_excel('False_classifications_epo5.xlsx')

num_sarcasm_base = (Base_analysis['sarcasm'].sum())
num_sarcasm_epo4 = (EPO4_analysis['sarcasm'].sum())
num_sarcasm_epo5 = (EPO5_analysis['sarcasm'].sum())
print(f"The number of the 'sarcasm' for baseline model is: {num_sarcasm_base:}")
print(f"The number of the 'sarcasm' for Fine-Tuned Model with 4 epochs is: {num_sarcasm_epo4:}")
print(f"The number of the 'sarcasm' for Fine-Tuned Model with 5 epochs is {num_sarcasm_epo5:}")
                    

percentage_sarcasm_base = (Base_analysis['sarcasm'].sum() / len(Base_analysis)) * 100
percentage_sarcasm_epo4 = (EPO4_analysis['sarcasm'].sum() / len(EPO4_analysis)) * 100
percentage_sarcasm_epo5 = (EPO5_analysis['sarcasm'].sum() / len(EPO5_analysis)) * 100

print(f"The percentage of the 'sarcasm' for baseline model is: {percentage_sarcasm_base:.2f}%")
print(f"The percentage of the 'sarcasm' for Fine-Tuned Model with 4 epochs is: {percentage_sarcasm_epo4:.2f}%")
print(f"The percentage of the 'sarcasm' for Fine-Tuned Model with 5 epochs is {percentage_sarcasm_epo5:.2f}%")


The number of the 'sarcasm' for baseline model is: 33
The number of the 'sarcasm' for Fine-Tuned Model with 4 epochs is: 46
The number of the 'sarcasm' for Fine-Tuned Model with 5 epochs is 34
The percentage of the 'sarcasm' for baseline model is: 3.20%
The percentage of the 'sarcasm' for Fine-Tuned Model with 4 epochs is: 9.89%
The percentage of the 'sarcasm' for Fine-Tuned Model with 5 epochs is 7.19%
