In [1]:
import numpy as np
import pandas as pd

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("anchit48/fine-tuned-sentiment-analysis-customer-feedback")
model = AutoModelForSequenceClassification.from_pretrained("anchit48/fine-tuned-sentiment-analysis-customer-feedback")

In [3]:
import torch

text = "Mirissa Beach is truly a gem on Sri Lanka’s southern coast! The soft, golden sand and crystal-clear waters are perfect for lounging or taking a dip"

def make_prediction(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)

    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=1).item()

    return predicted_class

In [None]:
negetive = "this very bad place I dont want to be here"
make_prediction(negetive)

In [None]:
df_eval = pd.read_csv("../Data/Place_Reviews_Evaluation.csv")
df_eval.head()

In [6]:
def convert_sentiment(data):
        if data == 'positive':
            return 1
        elif data == 'negative':
            return 0


In [7]:
df_eval['Sentiment_encoded'] = df_eval['Sentiment'].apply(convert_sentiment)

In [8]:
df_eval['Sentiment Predicted'] = df_eval['Review_Text'].apply(make_prediction)

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

conf_matrix = confusion_matrix(df_eval['Sentiment_encoded'] , df_eval['Sentiment Predicted'])


accuracy = accuracy_score(df_eval['Sentiment_encoded'] , df_eval['Sentiment Predicted'])

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=['Negative', 'Positive'], yticklabels=['Negative', 'Positive'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title(f'Confusion Matrix (Accuracy: {accuracy:.2f})')
plt.show()

In [None]:
df_eval.head()

In [12]:
df = pd.read_csv("../Data/Places Dataset Enriched Filled.csv")

In [13]:
import re

classified_reviews_array = []

def remove_non_ascii(s):
    return s.encode('ascii', 'ignore').decode('ascii')

def extract_list_items(string):
    review_sub_array = []
    string = remove_non_ascii(string)
    reviews = re.findall(r"'(.*?)'", string)

    for review in reviews:
        classification = make_prediction(review)
        review_sub_array.append(classification)
        classified_reviews_array.append(classification)

    print(review_sub_array)
    return review_sub_array

In [None]:
x = extract_list_items(df["latest_reviews"][4])

In [None]:
df["latest_reviews"][4]

In [None]:
df["classified_reviews"] = df["latest_reviews"].apply(extract_list_items)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter
import pandas as pd

data = classified_reviews_array
counts = Counter(data)

df_2 = pd.DataFrame({
    'Class': ['Negative (0)', 'Positive (1)'],
    'Count': [counts[0], counts[1]]
})

sns.set(style="whitegrid")
plt.figure(figsize=(8, 6))
sns.barplot(x='Class', y='Count', data=df_2, hue='Class', palette=['orange', 'green'], dodge=False, legend=False)

plt.xlabel('Class', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.title('Count of Corrupt, Negative, and Positive Texts', fontsize=14)

plt.show()


In [18]:
def negetive_rate(array):
  negative_count = array.count(0)
  if len(array) > 0:
    return round(negative_count / len(array),2)
  else:
    return -1

In [19]:
df["negetive_rate"]=df["classified_reviews"].apply(negetive_rate)

In [None]:
df.head()

In [21]:
def combine_columns(df):
    columns_to_combine = ['location_info', 'Geographical', 'Historical', 
                          'Religious', 'Natural', 'Entertainment', 'Accommodation', 
                          'Shopping', 'Food']
    
    df['combined_info'] = df[columns_to_combine].fillna('').apply(lambda x: ' '.join(x), axis=1)
    
    return df
    
df = combine_columns(df)

In [None]:
import nltk
from nltk.corpus import stopwords
import string

nltk.download('stopwords')

def remove_stop_words(df, text_column):
    stop_words = set(stopwords.words('english'))
    
    
    def clean_text(text):
        text = text.lower()
        text = text.translate(str.maketrans('', '', string.punctuation))
        words = text.split()
        cleaned_words = [word for word in words if word not in stop_words]
        return ' '.join(cleaned_words)

    df[text_column] = df[text_column].fillna('').apply(clean_text)
    
    return df

In [None]:
df = remove_stop_words(df, 'combined_info')

In [27]:
df.to_csv('Places Dataset Classified Reviews Cleaned Combined.csv', index=False)