In [4]:

import spacy
from spacy import displacy
import pandas as pd
import random
import requests

# Initialize spaCy model
nlp = spacy.load('en_core_web_sm')

# Function to load Amazon reviews dataset
def load_amazon_reviews():
    """
    Load Amazon reviews dataset from a public source
    Returns: list of review texts
    """
    try:
        # URL to Amazon product reviews dataset
        # Using a public dataset from Kaggle
        url = "https://raw.githubusercontent.com/bittlingmayer/AmazonReviews/master/data/Books.txt"

        # Read the dataset
        df = pd.read_csv(url, sep='\t', error_bad_lines=False)

        # Get a random sample of 5 reviews
        sample_reviews = df['reviewText'].dropna().sample(5, random_state=42).tolist()

        print(f"\nLoaded {len(sample_reviews)} reviews from the dataset")
        return sample_reviews
    except Exception as e:
        print(f"Error loading dataset: {e}")
        print("\nUsing sample reviews instead:")
        return [
            "I love my new Apple iPhone 14 Pro Max. It's amazing! The camera quality is outstanding.",
            "The Samsung Galaxy S23 is terrible. Battery life is horrible and it overheats constantly.",
            "Just bought a Dell XPS 15 laptop. Excellent performance and build quality.",
            "Sony's new WH-1000XM5 headphones are fantastic. Great sound quality and battery life.",
            "Terrible experience with HP printer. Keeps jamming and prints poorly."
        ]

# Load reviews from dataset
reviews = load_amazon_reviews()

def extract_entities(review):
    """
    Extract named entities from a review using spaCy's NER

    Args:
        review (str): The review text to analyze

    Returns:
        tuple: (product_entities, other_entities)
    """
    doc = nlp(review)
    product_entities = []
    other_entities = []

    for ent in doc.ents:
        if ent.label_ in ['ORG', 'PRODUCT']:
            product_entities.append((ent.text, ent.label_))
        else:
            other_entities.append((ent.text, ent.label_))

    return product_entities, other_entities

def analyze_sentiment(review):
    """
    Analyze review sentiment using rule-based approach

    Args:
        review (str): The review text to analyze

    Returns:
        str: 'positive', 'negative', or 'neutral'
    """
    # List of positive and negative words
    positive_words = ['love', 'amazing', 'excellent', 'fantastic', 'great', 'good', 'best']
    negative_words = ['terrible', 'horrible', 'poor', 'bad', 'worst', 'awful', 'disappointed']

    # Convert review to lowercase
    review_lower = review.lower()

    # Count positive and negative words
    pos_count = sum(1 for word in positive_words if word in review_lower)
    neg_count = sum(1 for word in negative_words if word in review_lower)

    # Determine sentiment
    if pos_count > neg_count:
        return 'positive'
    elif neg_count > pos_count:
        return 'negative'
    return 'neutral'

def analyze_reviews():
    """
    Analyze all reviews for entities and sentiment
    """
    print("\n=== Amazon Product Reviews Analysis ===\n")

    for i, review in enumerate(reviews, 1):
        print(f"\nReview {i}:")
        print("-" * 50)
        print(f"Original Review: {review}")

        # Extract entities
        product_entities, other_entities = extract_entities(review)

        # Display extracted entities
        print("\nExtracted Entities:")
        print("Product/Brand Entities:")
        for entity, label in product_entities:
            print(f"- {entity} ({label})")

        print("\nOther Entities:")
        for entity, label in other_entities:
            print(f"- {entity} ({label})")

        # Analyze sentiment
        sentiment = analyze_sentiment(review)
        print(f"\nSentiment: {sentiment.upper()}")

if __name__ == "__main__":
    analyze_reviews()

Error loading dataset: read_csv() got an unexpected keyword argument 'error_bad_lines'

Using sample reviews instead:

=== Amazon Product Reviews Analysis ===


Review 1:
--------------------------------------------------
Original Review: I love my new Apple iPhone 14 Pro Max. It's amazing! The camera quality is outstanding.

Extracted Entities:
Product/Brand Entities:
- Apple (ORG)

Other Entities:
- 14 (CARDINAL)

Sentiment: POSITIVE

Review 2:
--------------------------------------------------
Original Review: The Samsung Galaxy S23 is terrible. Battery life is horrible and it overheats constantly.

Extracted Entities:
Product/Brand Entities:
- S23 (ORG)

Other Entities:

Sentiment: NEGATIVE

Review 3:
--------------------------------------------------
Original Review: Just bought a Dell XPS 15 laptop. Excellent performance and build quality.

Extracted Entities:
Product/Brand Entities:

Other Entities:
- 15 (CARDINAL)

Sentiment: POSITIVE

Review 4:
--------------------------------