In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re

In [None]:
# Download necessary NLTK data
nltk.download('stopwords')
nltk.download('wordnet')

In [None]:
# Preprocessing function for text data
def preprocess_text(text):
    # Remove punctuation and numbers
    text = re.sub(r'[^a-zA-Z\s]', '', text, re.I|re.A)
    # Convert to lowercase
    text = text.lower()
    # Tokenize text
    tokens = text.split()
    # Remove stopwords
    tokens = [word for word in tokens if word not in stopwords.words('english')]
    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return ' '.join(tokens)


In [None]:
# Load and preprocess dataset
def load_and_preprocess_data(file_path):
    df = pd.read_csv(file_path)
    df['ProcessedText'] = df['ReviewText'].apply(preprocess_text)
    X = df['ProcessedText']
    y = df['Sentiment']  # Assuming Sentiment is binary (0: Negative, 1: Positive)
    return X, y

In [None]:
# Main function to run sentiment analysis
def run_sentiment_analysis(file_path):
    X, y = load_and_preprocess_data(file_path)
    
    # Splitting the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Creating a pipeline for TF-IDF Vectorization and Logistic Regression
    pipeline = Pipeline([
        ('tfidf', TfidfVectorizer()),
        ('classifier', LogisticRegression(random_state=42))
    ])
    
    # Train the model
    pipeline.fit(X_train, y_train)
    
    # Predictions
    predictions = pipeline.predict(X_test)
    
    # Evaluation
    print("Accuracy Score:", accuracy_score(y_test, predictions))
    print("Classification Report:")
    print(classification_report(y_test, predictions))


In [None]:
if __name__ == "__main__":
    file_path = "reviews_data.csv" 
    run_sentiment_analysis(file_path)