In [None]:
%pip install pandas numpy matplotlib nltk

In [2]:
# Imports
import pandas as pd # type: ignore
import numpy as np # type: ignore
import matplotlib.pyplot as plt # type: ignore
import nltk # type: ignore
from nltk.sentiment.vader import SentimentIntensityAnalyzer # type: ignore
from nltk.corpus import stopwords # type: ignore
from nltk.tokenize import word_tokenize # type: ignore
from nltk.stem import WordNetLemmatizer # type: ignore

In [None]:
nltk.download('vader_lexicon')
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('punkt_tab')

# Load csv file that contains reviews with scores 1-5
file_path = 'amazon_reviews.csv'
df = pd.read_csv(file_path)

In [None]:
# Preprocess text data: tokenize text, remove stopwords, lemmatize tokens, join into string
# LONG TRAINING TIME:~ 1m 10s
def preprocess_text(text):
    tokens = word_tokenize(str(text).lower())
    filtered_tokens = [token for token in tokens if token not in stopwords.words('english')]
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens]
    return ' '.join(lemmatized_tokens)

print(f'Column names: {df.columns}')
df['reviewText'] = df['reviewText'].fillna('')
df['processed_review'] = df['reviewText'].apply(preprocess_text)

In [None]:
# Perform sentiment analysis using NLTK's VADER
analyzer = SentimentIntensityAnalyzer()

def get_sentiment_score(text):
    scores = analyzer.polarity_scores(text)
    return scores['compound']

# Add sentiment scores to the DataFrame
df['sentiment_score'] = df['processed_review'].apply(get_sentiment_score)

# Classify sentiment based on score thresholds
def classify_sentiment(score):
    if score > 0.05:
        return 'Positive'
    elif score < -0.05:
        return 'Negative'
    else:
        return 'Neutral'

df['sentiment'] = df['sentiment_score'].apply(classify_sentiment)

# Print sentiment distribution
print(df['sentiment'].value_counts())

In [None]:
# Visualize Data in csv file
sentiment_counts = df['sentiment'].value_counts()
plt.figure(figsize=(8, 5))
plt.bar(sentiment_counts.index, sentiment_counts.values, color=['green', 'red', 'blue'])
plt.title('Sentiment Distribution of Amazon Reviews')
plt.xlabel('Sentiment')
plt.ylabel('Number of Reviews')
plt.show()


In [None]:
while True:
    user_input = input("\nEnter a sentence (or type 'exit' to quit): ")
    if user_input.lower() == 'exit':
        print("Goodbye!")
        break
    
    sentiment_score = analyzer.polarity_scores(user_input)['compound']
    
    # Classify sentiment based on the score
    if sentiment_score > 0.05:
        sentiment = "Positive"
    elif sentiment_score < -0.05:
        sentiment = "Negative"
    else:
        sentiment = "Neutral"
    
    
    print(f"User input:  {user_input}")
    print(f"Predicted Sentiment: {sentiment}")