###  ***Sentiment Analyzer***

In [None]:
import nltk
import random
from nltk.corpus import movie_reviews
from gensim.models import Word2Vec
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
# Downloading the movie_reviews corpus
nltk.download('movie_reviews')

# Loading movie_reviews corpus documents and labels
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]
random.shuffle(documents)  # Shuffling the documents
train_set, test_set = train_test_split(documents, test_size=0.2, random_state=42)
train_set, val_set = train_test_split(train_set, test_size=0.1, random_state=42)

# creating embeddings of words using word2vec
model = Word2Vec([doc for doc, _ in train_set], vector_size=100)

In [None]:
#function to create document embeddings
def document_embedding(doc, model):
    words = [word for word in doc if word in model.wv] #creating a list
    if not words:
        return None
    return sum(model.wv[word] for word in words) / len(words)         # summing the vectors of all the words in the document to create a single vector

# Creating feature sets
X_train = [document_embedding(doc, model) for doc, _ in train_set]
X_val = [document_embedding(doc, model) for doc, _ in val_set]
X_test = [document_embedding(doc, model) for doc, _ in test_set]

# Extracting labels
y_train = [category for _, category in train_set]
y_val = [category for _, category in val_set]
y_test = [category for _, category in test_set]

# Removing None values (documents that couldn't be embedded)
X_train = [embedding for embedding in X_train if embedding is not None]
X_val = [embedding for embedding in X_val if embedding is not None]
X_test = [embedding for embedding in X_test if embedding is not None]
y_train = y_train[:len(X_train)]
y_val = y_val[:len(X_val)]
y_test = y_test[:len(X_test)]


In [None]:
# Training an SVM classifier
svm_classifier = SVC()
svm_classifier.fit(X_train, y_train)

# Evaluating the classifier on the validation set
val_predictions = svm_classifier.predict(X_val)
accuracy_val = accuracy_score(y_val, val_predictions)
print(f'Validation Accuracy: {accuracy_val:.2f}')
print(classification_report(y_val, val_predictions))

# Evaluating the classifier on the test set
test_predictions = svm_classifier.predict(X_test)
accuracy_test = accuracy_score(y_test, test_predictions)
print(f'Test Accuracy: {accuracy_test:.2f}')
print(classification_report(y_test, test_predictions))
