<a href="https://colab.research.google.com/github/yuthika21z/yuthika21z/blob/main/sentiment_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
import nltk
import random
from nltk.corpus import movie_reviews
from nltk.classify.scikitlearn import SklearnClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Step 1: Prepare the data
nltk.download('movie_reviews')
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]
random.shuffle(documents)

# Step 2: Preprocess the data and extract features
all_words = nltk.FreqDist(w.lower() for w in movie_reviews.words())
word_features = list(all_words.keys())[:5000]  # Use the top 5000 most frequent words as features

def document_features(document):
    document_words = set(document)
    features = {}
    for word in word_features:
        features[word] = (word in document_words)
    return features

# Step 3: Extract features and split the data into training and testing sets
featuresets = [(document_features(d), c) for (d, c) in documents]
train_set, test_set = featuresets[:1500], featuresets[1500:]

# Step 4: Train a classifier (Naive Bayes)
classifier = SklearnClassifier(MultinomialNB())
classifier.train(train_set)

# Step 5: Evaluate the classifier
test_features, test_labels = zip(*test_set)
predictions = classifier.classify_many(test_features)
accuracy = accuracy_score(test_labels, predictions)
print("Naive Bayes Classifier Accuracy:", accuracy)

# Step 6: Train a classifier (SVM)
svm_classifier = SklearnClassifier(SVC(kernel='linear'))
svm_classifier.train(train_set)

# Step 7: Evaluate the SVM classifier
svm_predictions = svm_classifier.classify_many(test_features)
svm_accuracy = accuracy_score(test_labels, svm_predictions)
print("SVM Classifier Accuracy:", svm_accuracy)

# Step 8: Make predictions
def predict_sentiment(text, classifier):
    words = nltk.word_tokenize(text)
    features = document_features(words)
    sentiment = classifier.classify(features)
    return "Positive" if sentiment == 'pos' else "Negative"

# Example usage:
sample_review = "This Film is not only good but also flawless"
nb_sentiment = predict_sentiment(sample_review, classifier)
svm_sentiment = predict_sentiment(sample_review, svm_classifier)
print("Naive Bayes Predicted Sentiment:", nb_sentiment)
print("SVM Predicted Sentiment:", svm_sentiment)


[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.


Naive Bayes Classifier Accuracy: 0.852
SVM Classifier Accuracy: 0.826
Naive Bayes Predicted Sentiment: Positive
SVM Predicted Sentiment: Negative


**Perfrom Prediction**

In [None]:
def predict_sentiment(text, classifier):
    words = nltk.word_tokenize(text)
    features = document_features(words)
    sentiment = classifier.classify(features)
    return "Positive" if sentiment == 'pos' else "Negative"

# Example usage:
sample_review = "This Film is not only good but also flawless"
nb_sentiment = predict_sentiment(sample_review, classifier)
svm_sentiment = predict_sentiment(sample_review, svm_classifier)
print("Naive Bayes Predicted Sentiment:", nb_sentiment)
print("SVM Predicted Sentiment:", svm_sentiment)

Naive Bayes Predicted Sentiment: Positive
SVM Predicted Sentiment: Negative
