In [5]:
import warnings
warnings.filterwarnings('ignore')  # Ignore all warnings
import pandas as pd  # Import pandas for data manipulation
from sklearn.feature_extraction.text import TfidfVectorizer  # Import TF-IDF Vectorizer
import tkinter as tk  # Import tkinter for GUI creation
from tkinter import messagebox, ttk  # Import messagebox and ttk from tkinter
import nltk  # Import nltk for natural language processing
from nltk.sentiment import SentimentIntensityAnalyzer  # Import sentiment analyzer from nltk
import matplotlib.pyplot as plt  # Import matplotlib for plotting
import requests  # Import requests for HTTP requests
from bs4 import BeautifulSoup  # Import BeautifulSoup for parsing HTML
from tqdm import tqdm  # Import tqdm for progress bar
import datetime  # Import datetime for timestamps

# Download the VADER lexicon for sentiment analysis
nltk.download('vader_lexicon')

# Load the dataset containing Amazon reviews
data = pd.read_csv('C:\\Users\\PAYAL\\Desktop\\Payal STUDY\\MCA\\MCA 4B\\AmazonReview.csv')
data.dropna(inplace=True)  # Remove any rows with missing values

# Convert sentiment values to binary (0 or 1)
data.loc[data['Sentiment'] <= 3, 'Sentiment'] = 0  # Negative sentiment
data.loc[data['Sentiment'] > 3, 'Sentiment'] = 1   # Positive sentiment

# Train the TF-IDF vectorizer on the review text
vectorizer = TfidfVectorizer(max_features=2500)  # Limit to 2500 features
X = vectorizer.fit_transform(data['Review']).toarray()  # Transform the reviews to TF-IDF matrix
y = data['Sentiment']  # Extract the sentiment labels

# Initialize the VADER sentiment analyzer
sia = SentimentIntensityAnalyzer()

# Function to fetch reviews from an Amazon product page
def fetch_reviews(url, retries=3):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    for attempt in range(retries):
        try:
            response = requests.get(url, headers=headers)
            response.raise_for_status()  # Will raise an HTTPError for bad responses
            soup = BeautifulSoup(response.text, 'html.parser')
            review_elements = soup.find_all('span', {'data-hook': 'review-body'})
            reviews = [element.get_text().strip() for element in review_elements]
            return reviews
        except requests.exceptions.HTTPError as http_err:
            print(f"HTTP error occurred: {http_err}")
        except requests.exceptions.RequestException as req_err:
            print(f"Request error occurred: {req_err}")
        time.sleep(5)  # Wait for 5 seconds before retrying

    messagebox.showinfo('Error', 'Failed to fetch reviews after several attempts.')
    return []

# Create the main application window
root = tk.Tk()
root.title('Sentiment Analysis')  # Set the title of the window
root.geometry('600x500')  # Set the window size
root.configure(bg='white')  # Set the background color

# Add a title label to the window
title_label = tk.Label(root, text='Sentiment Analysis of Amazon Reviews', font=('Arial', 24, 'bold'), bg='white', fg='blue')
title_label.pack(pady=20)

# Function to analyze the reviews
def analyze_reviews():
    # Get the reviews entered by the user
    reviews = review_entry.get('1.0', 'end-1c').split('\n')
    reviews = [review.strip() for review in reviews if review.strip() != '']

    # Get the URL entered by the user
    url = url_entry.get()
    
    # Check if both reviews and URL are provided
    if len(reviews) > 0 and url:
        messagebox.showinfo('Error', 'Please enter reviews in the review input field OR provide the URL in the URL input field, not both.')
        return

    # Check if neither reviews nor URL are provided
    if len(reviews) == 0 and not url:
        messagebox.showinfo('Error', 'Please enter at least one review in the review input field OR provide the URL in the URL input field.')
        return

    # Fetch reviews from URL if no reviews are provided
    if len(reviews) == 0:
        with tqdm(total=1, desc='Fetching Reviews', unit='page', ncols=80, bar_format='{l_bar}{bar}') as pbar:
            reviews = fetch_reviews(url)
            pbar.update(1)  # Update progress bar after fetching reviews

        # Show error if no reviews are found
        if len(reviews) == 0:
            messagebox.showinfo('Error', 'No reviews found on the provided URL.')
            return

    sentiments = []  # List to store sentiment labels
    sentiment_scores = []  # List to store sentiment scores

    # Analyze each review
    for review in reviews:
        sentiment_score = sia.polarity_scores(review)  # Get sentiment scores
        sentiment_scores.append(sentiment_score)
        compound_score = sentiment_score['compound']

        # Determine sentiment label based on compound score
        if compound_score >= 0.05:
            sentiment = 'Positive'
        elif compound_score <= -0.05:
            sentiment = 'Negative'
        else:
            sentiment = 'Neutral'

        sentiments.append(sentiment)

    # Count the number of each sentiment
    positive_count = sentiments.count('Positive')
    negative_count = sentiments.count('Negative')
    neutral_count = sentiments.count('Neutral')

    # Display the sentiment results
    result_label.config(text=f'Positive: {positive_count}\nNegative: {negative_count}\nNeutral: {neutral_count}', font=('Arial', 18, 'bold'), fg='black')

    # Plot pie chart for sentiment distribution
    labels = ['Positive', 'Negative', 'Neutral']
    sizes = [positive_count, negative_count, neutral_count]
    colors = ['#00cc00', '#cc0000', '#cccc00']
    explode = (0.1, 0.1, 0.1)

    plt.figure(figsize=(6, 6))
    plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', startangle=140, shadow=True)
    plt.title('Sentiment Distribution', fontdict={'fontsize': 18})
    plt.legend(labels, loc="best")
    plt.axis('equal')
    plt.show()

    # Plot bar graph for sentiment distribution
    plt.figure(figsize=(8, 6))
    plt.bar(labels, sizes, color=colors)
    plt.title('Sentiment Distribution', fontdict={'fontsize': 18})
    plt.xlabel('Sentiment', fontsize=14)
    plt.ylabel('Count', fontsize=14)
    plt.show()

    # Plot sentiment scores
    sentiment_scores_df = pd.DataFrame(sentiment_scores)
    sentiment_scores_df.plot(kind='line', figsize=(10, 6))
    plt.title('Sentiment Scores', fontdict={'fontsize': 18})
    plt.xlabel('Review', fontsize=14)
    plt.ylabel('Score', fontsize=14)
    plt.legend()
    plt.show()

# Function to save the sentiment analysis results to a file
def save_results():
    now = datetime.datetime.now()  # Get the current date and time
    timestamp = now.strftime("%Y-%m-%d_%H-%M-%S")  # Format the timestamp
    file_name = f"sentiment_results_{timestamp}.txt"  # Create the file name
    reviews = review_entry.get('1.0', 'end-1c')  # Get the reviews from the input field
    sentiment_results = result_label.cget("text")  # Get the sentiment results from the label
    save_text = f"Reviews:\n{reviews}\n\nSentiment Results:\n{sentiment_results}"  # Format the text to save
    with open(file_name, 'w') as file:
        file.write(save_text)  # Write the text to the file
    messagebox.showinfo('Save', f'Results saved to file: {file_name}')  # Show a message indicating the file was saved

# Function to clear the input fields and results
def clear_input():
    review_entry.delete('1.0', 'end')  # Clear the review input field
    url_entry.delete(0, 'end')  # Clear the URL input field
    result_label.config(text='')  # Clear the result label

# Create the review input field, URL input field, and analyze button
review_label = tk.Label(root, text='Enter reviews (one per line):', font=('Arial', 16), bg='white', fg='black')
review_label.pack()

review_entry = tk.Text(root, height=6, width=50, font=('Arial', 14), bd=2, relief='solid')
review_entry.pack(pady=10)

url_heading = tk.Label(root, text='Enter Amazon Review URL:', font=('Arial', 16), bg='white', fg='black')
url_heading.pack()

url_entry = tk.Entry(root, font=('Arial', 14), bd=2, relief='solid', width=50)
url_entry.pack(pady=10)

button_frame = tk.Frame(root, bg='white')
button_frame.pack()

analyze_button = tk.Button(button_frame, text='Analyze', font=('Arial', 16, 'bold'), bg='blue', fg='white', command=analyze_reviews)
analyze_button.grid(row=0, column=0, padx=10, pady=10)

save_button = tk.Button(button_frame, text='Save', font=('Arial', 16

, 'bold'), bg='green', fg='white', command=save_results)
save_button.grid(row=0, column=1, padx=10, pady=10)

clear_button = tk.Button(button_frame, text='Clear', font=('Arial', 16, 'bold'), bg='red', fg='white', command=clear_input)
clear_button.grid(row=0, column=2, padx=10, pady=10)

# Create the result label
result_label = tk.Label(root, text='', font=('Arial', 18, 'bold'), bg='white', fg='black')
result_label.pack(pady=20)

# Start the main application loop
root.mainloop()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\PAYAL\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
Fetching Reviews: 100%|█████████████████████████████████████████████████████████
