<a href="https://colab.research.google.com/github/krishnabalajiwork/Sentiment-Analysis/blob/main/Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from textblob import TextBlob
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# Load the dataset
file_path = '/content/bts_2021_1.csv'
data = pd.read_csv(file_path)

# Show initial data and check missing values
print(data.head())
print(data.isnull().sum())

# Drop rows with missing comment_text
data = data.dropna(subset=['comment_text'])

# Function to calculate polarity and subjectivity
def get_sentiment(text):
    blob = TextBlob(text)
    return blob.sentiment.polarity, blob.sentiment.subjectivity

# Apply sentiment calculation
data[['polarity', 'subjectivity']] = data['comment_text'].apply(lambda x: pd.Series(get_sentiment(x)))

# Categorize sentiment based on polarity
def categorize_sentiment(polarity):
    if polarity > 0:
        return 'Positive'
    elif polarity == 0:
        return 'Neutral'
    else:
        return 'Negative'

data['sentiment'] = data['polarity'].apply(categorize_sentiment)

# Encode sentiment labels
le = LabelEncoder()
data['sentiment_encoded'] = le.fit_transform(data['sentiment'])

# Vectorize comment_text
vectorizer = CountVectorizer(stop_words='english')
X = vectorizer.fit_transform(data['comment_text'])

# Target variable
y = data['sentiment_encoded']

# Split into train and test sets (80/20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression model
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)

# Evaluate model
y_pred = log_reg.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


# Function to predict sentiment, polarity, and subjectivity for new comments
def predict_sentiment_with_polarity_subjectivity(new_comments):
    new_comments_vectorized = vectorizer.transform(new_comments)
    predictions_encoded = log_reg.predict(new_comments_vectorized)
    predicted_sentiments = le.inverse_transform(predictions_encoded)
    polarity_subjectivity = [get_sentiment(comment) for comment in new_comments]

    results = []
    for comment, sentiment, (polarity, subjectivity) in zip(new_comments, predicted_sentiments, polarity_subjectivity):
        results.append({
            'Comment': comment,
            'Predicted Sentiment': sentiment,
            'Polarity': polarity,
            'Subjectivity': subjectivity
        })
    return results


# Test new comments
new_comments = [
    "I love you",
    "I didn't like this video at all, but it wasn't a waste of time",
    "I hate you"
]

predicted_results = predict_sentiment_with_polarity_subjectivity(new_comments)

for result in predicted_results:
    print(f"Comment: '{result['Comment']}'")
    print(f"Predicted Sentiment: {result['Predicted Sentiment']}")
    print(f"Polarity: {result['Polarity']}")
    print(f"Subjectivity: {result['Subjectivity']}\n")


  query                                          url  \
0   bts  https://www.youtube.com/watch?v=S8GpX3SAeig   
1   bts  https://www.youtube.com/watch?v=S8GpX3SAeig   
2   bts  https://www.youtube.com/watch?v=S8GpX3SAeig   
3   bts  https://www.youtube.com/watch?v=S8GpX3SAeig   
4   bts  https://www.youtube.com/watch?v=S8GpX3SAeig   

                                               title           upload_date  \
0  5 Hour BTS Piano Playlist | Study & Relax with...  2021-01-01T10:58:00Z   
1  5 Hour BTS Piano Playlist | Study & Relax with...  2021-01-01T10:58:00Z   
2  5 Hour BTS Piano Playlist | Study & Relax with...  2021-01-01T10:58:00Z   
3  5 Hour BTS Piano Playlist | Study & Relax with...  2021-01-01T10:58:00Z   
4  5 Hour BTS Piano Playlist | Study & Relax with...  2021-01-01T10:58:00Z   

    channel    views   likes  dislikes  comment_count  \
0  DooPiano  2444982  119269       501           3224   
1  DooPiano  2444982  119269       501           3224   
2  DooPiano  2444982  1