<a href="https://colab.research.google.com/github/youness-marrakchi/NLP-Sentiment-Analysis/blob/main/NLP_Sentiment_anls_MK1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Sample data
data = {
    'review': [
        "I loved this movie, it was fantastic!",
        "The movie was okay, but not great.",
        "I did not like this movie at all. It was terrible.",
        "What a wonderful film! A must-watch.",
        "Boring and dull, wouldn't recommend."
    ],
    'sentiment': ['positive', 'neutral', 'negative', 'positive', 'negative']
}

# Create DataFrame from the data
df = pd.DataFrame(data)

# Function to preprocess the text (cleaning the review)
def preprocess_text(text):
    # Convert text to lowercase
    text = text.lower()
    # Remove punctuation using regex
    text = re.sub(r'[^\w\s]', '', text)
    return text

# Apply the preprocess_text function to the 'review' column
df['cleaned_review'] = df['review'].apply(preprocess_text)

# Split the data into training and test sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Example lexicon with words and their sentiment scores
lexicon = {
    'love': 1,
    'fantastic': 1,
    'great': 1,
    'wonderful': 1,
    'mustwatch': 1,
    'okay': 0,
    'boring': -1,
    'dull': -1,
    'terrible': -1,
    'not': -1
}

# Function to analyze sentiment of a given review using the lexicon
def analyze_sentiment(review, lexicon):
    # Split the review into individual words
    words = review.split()
    # Calculate the sentiment score by summing the scores of each word found in the lexicon
    sentiment_score = sum([lexicon.get(word, 0) for word in words])
    # Determine sentiment based on the sentiment score
    if sentiment_score > 0:
        return 'positive'
    elif sentiment_score < 0:
        return 'negative'
    else:
        return 'neutral'

# Apply the analyze_sentiment function to the 'cleaned_review' column of the test set
test_df['predicted_sentiment'] = test_df['cleaned_review'].apply(analyze_sentiment, lexicon=lexicon)

print(test_df[['review', 'cleaned_review', 'sentiment', 'predicted_sentiment']])

# Accuracy of the sentiment predictions
accuracy = accuracy_score(test_df['sentiment'], test_df['predicted_sentiment'])
print(f"Accuracy: {accuracy:.2f}")

# Classification report to show detailed evaluation metrics
print(classification_report(test_df['sentiment'], test_df['predicted_sentiment']))


                               review                    cleaned_review  \
1  The movie was okay, but not great.  the movie was okay but not great   

  sentiment predicted_sentiment  
1   neutral             neutral  
Accuracy: 1.00
              precision    recall  f1-score   support

     neutral       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1

