## VADER Model

In [6]:
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
import re

# Download the VADER lexicon
nltk.download('vader_lexicon')

# Initialize the VADER sentiment analyzer
sia = SentimentIntensityAnalyzer()

# Read the CSV file
file_path = '../review_data/dataset_7.csv'
df = pd.read_csv(file_path)

# Function to analyze sentiment
def clean_review(review):
    if pd.isna(review):
        return None
    review = str(review)  # Ensure the review is a string
    review = review.lower()  # Convert to lowercase
    review = re.sub(r'[^a-zA-Z\s]', '', review)  # Remove non-alphabetic characters
    review = re.sub(r'\s+', ' ', review).strip()  # Remove extra whitespace
    return review if review else None

# Clean the 'Review' column in place
df['Review'] = df['Review'].apply(clean_review)

# Drop rows where 'Review' is None
df.dropna(subset=['Review', 'Rating'], inplace=True)

def analyze_sentiment(review):
    scores = sia.polarity_scores(review)
    return scores

# Apply the sentiment analysis function to the 'Review' column
df['Score'] = df['Review'].apply(analyze_sentiment)

def set_sentiment(score):
    if score['compound'] >= 0.5:
        return 'positive'
    elif score['compound'] <= -0.5:
        return 'negative'
    else:
        return 'neutral'

df['Sentiment'] = df['Score'].apply(set_sentiment)

# Optionally, save the results to a new CSV file
output_file_path = '../review_data/dataset_7(senti).csv'
output_file_path = '../analyzed_data/vader_analyzed_data.csv'
df.to_csv(output_file_path, index=False)

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\KIIT\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


## Textblob

In [None]:
import pandas as pd
from textblob import TextBlob

# Read the CSV file
file_path = 'review_data/transulated_all_products_data.csv'
df = pd.read_csv(file_path)

# Function to determine sentiment using TextBlob
def get_sentiment(review):
    analysis = TextBlob(review)
    if analysis.sentiment.polarity > 0:
        return 'positive'
    elif analysis.sentiment.polarity < 0:
        return 'negative'
    else:
        return 'neutral'

# Apply the function to create the Sentiment column
df['Sentiment'] = df['Review'].apply(get_sentiment)

# Save the new dataframe with Sentiment column
df.to_csv('analyzed_data/textblob_analyzed_data.csv', index=False)

# Print the first few rows of the updated dataframe
print(df.head())
