<a href="https://colab.research.google.com/github/vkamole/iris-dataset-sklearn/blob/main/nlp_spacy_ner_sentiment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Create data folder and sample review file
!mkdir -p data

with open("data/sample_reviews.txt", "w") as f:
    f.write("""I love the battery life of the Samsung Galaxy S21. Highly recommended!
The Apple AirPods Max are overpriced and uncomfortable.
I recently bought the Sony WH-1000XM5 and the sound quality is amazing.
The Lenovo ThinkPad is decent, but the display isn't sharp.
Avoid the cheap knockoffs of JBL speakers. Poor sound quality.""")


In [5]:
with open("data/sample_reviews.txt", "r") as f:
    reviews = f.readlines()

print(reviews)


['I love the battery life of the Samsung Galaxy S21. Highly recommended!\n', 'The Apple AirPods Max are overpriced and uncomfortable.\n', 'I recently bought the Sony WH-1000XM5 and the sound quality is amazing.\n', "The Lenovo ThinkPad is decent, but the display isn't sharp.\n", 'Avoid the cheap knockoffs of JBL speakers. Poor sound quality.']


In [6]:
# spaCy NER + Rule-based Sentiment on Product Reviews

import spacy
import re

# Load spaCy English model
nlp = spacy.load("en_core_web_sm")

# Read reviews
with open("data/sample_reviews.txt", "r") as f:
    reviews = f.readlines()

# Keywords for rule-based sentiment
positive_keywords = ["love", "recommended", "amazing", "great", "sharp"]
negative_keywords = ["overpriced", "uncomfortable", "poor", "avoid", "isn't"]

def analyze_sentiment(text):
    text = text.lower()
    pos = any(word in text for word in positive_keywords)
    neg = any(word in text for word in negative_keywords)
    if pos and not neg:
        return "Positive"
    elif neg and not pos:
        return "Negative"
    elif pos and neg:
        return "Mixed"
    else:
        return "Neutral"

# Process each review
for review in reviews:
    doc = nlp(review)
    print(f"\nReview: {review.strip()}")

    # Named Entities
    print("Named Entities:")
    for ent in doc.ents:
        print(f" - {ent.text} ({ent.label_})")

    # Sentiment
    sentiment = analyze_sentiment(review)
    print(f"Sentiment: {sentiment}")



Review: I love the battery life of the Samsung Galaxy S21. Highly recommended!
Named Entities:
 - the Samsung Galaxy S21 (ORG)
Sentiment: Positive

Review: The Apple AirPods Max are overpriced and uncomfortable.
Named Entities:
Sentiment: Negative

Review: I recently bought the Sony WH-1000XM5 and the sound quality is amazing.
Named Entities:
 - Sony (ORG)
Sentiment: Positive

Review: The Lenovo ThinkPad is decent, but the display isn't sharp.
Named Entities:
 - Lenovo (ORG)
Sentiment: Mixed

Review: Avoid the cheap knockoffs of JBL speakers. Poor sound quality.
Named Entities:
 - JBL (ORG)
Sentiment: Negative
