In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import naive_bayes
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [None]:
# Download NLTK resources (if not already downloaded)
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [None]:
# Step 2: Text Preprocessing
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

In [None]:
def preprocess_text(text):
    # Remove special characters and symbols
    text = text.replace('[^a-zA-Z]', ' ')

    # Convert to lowercase
    text = text.lower()

    # Tokenize the text
    tokens = text.split()

    # Remove stop words and lemmatize the tokens
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in stop_words]

    # Join the tokens back into a single string
    preprocessed_text = ' '.join(tokens)

    return preprocessed_text

In [None]:
df = pd.read_csv("/content/drive/MyDrive/ML Applications Project/CSV Files/Sentimental_Analysis.csv")
df.head()

Unnamed: 0,Sentence,Sentiment
0,The GeoSolutions technology will leverage Bene...,positive
1,"$ESI on lows, down $1.50 to $2.50 BK a real po...",negative
2,"For the last quarter of 2010 , Componenta 's n...",positive
3,According to the Finnish-Russian Chamber of Co...,neutral
4,The Swedish buyout firm has sold its remaining...,neutral


In [None]:
# Apply text preprocessing to the 'text' column of the DataFrame
df['Preprocessed_Sentence'] = df['Sentence'].apply(preprocess_text)
df.head()

Unnamed: 0,Sentence,Sentiment,Preprocessed_Sentence
0,The GeoSolutions technology will leverage Bene...,positive,geosolutions technology leverage benefon 's gp...
1,"$ESI on lows, down $1.50 to $2.50 BK a real po...",negative,"$esi lows, $1.50 $2.50 bk real possibility"
2,"For the last quarter of 2010 , Componenta 's n...",positive,"last quarter 2010 , componenta 's net sale dou..."
3,According to the Finnish-Russian Chamber of Co...,neutral,"according finnish-russian chamber commerce , m..."
4,The Swedish buyout firm has sold its remaining...,neutral,swedish buyout firm sold remaining 22.4 percen...


In [None]:
# Step 3: Feature Extraction
tfidf_vectorizer = TfidfVectorizer()
X = tfidf_vectorizer.fit_transform(df['Preprocessed_Sentence'])

In [None]:
# Step 4: Train a Sentiment Classifier
X_train, X_test, y_train, y_test = train_test_split(X, df['Sentiment'], test_size=0.2, random_state=42)

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
y_test = y_test.to_numpy()

(4318, 10117) (4318,) (1080, 10117) (1080,)


In [None]:
# Step 5: Evaluate and Fine-tune the Model

# Naive Bayes Classification
naive_bayes_classifier = naive_bayes.MultinomialNB()
naive_bayes_classifier.fit(X_train, y_train)

y_pred_nbc = naive_bayes_classifier.predict(X_test)
print("Naive Bayes Classification (NBC) Classification Report:")
print(classification_report(y_test, y_pred_nbc))

# SVM Classification
svm_classifier = SVC()
svm_classifier.fit(X_train, y_train)

y_pred_svm = svm_classifier.predict(X_test)
print("Support Vector Machines (SVM) Classification Report:")
print(classification_report(y_test, y_pred_svm))

# Random Forest Classifier
random_forest_classifier = RandomForestClassifier()
random_forest_classifier.fit(X_train, y_train)

y_pred_rf = random_forest_classifier.predict(X_test)
print("Random Forest (RF) Classification Report:")
print(classification_report(y_test, y_pred_rf))

Naive Bayes Classification (NBC) Classification Report:
              precision    recall  f1-score   support

    negative       1.00      0.04      0.07       158
     neutral       0.65      0.97      0.78       590
    positive       0.68      0.42      0.52       332

    accuracy                           0.66      1080
   macro avg       0.78      0.47      0.46      1080
weighted avg       0.71      0.66      0.60      1080

Support Vector Machines (SVM) Classification Report:
              precision    recall  f1-score   support

    negative       0.21      0.07      0.10       158
     neutral       0.66      0.91      0.76       590
    positive       0.79      0.51      0.62       332

    accuracy                           0.66      1080
   macro avg       0.55      0.50      0.50      1080
weighted avg       0.63      0.66      0.62      1080

Random Forest (RF) Classification Report:
              precision    recall  f1-score   support

    negative       0.20      0.1