# 🐦 Twitter Sentiment Analysis
Using RoBERTa model with Gradio Interface

## 🎯 Objective
To classify sentiment of tweets using a fine-tuned RoBERTa model.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax

In [None]:
roberta = 'cardiffnlp/twitter-roberta-base-sentiment'
tokenizer = AutoTokenizer.from_pretrained(roberta)
model = AutoModelForSequenceClassification.from_pretrained(roberta)
labels = ['Negative', 'Neutral', 'Positive']

In [None]:
def preprocess_tweet(tweet):
    tweet_words = []
    for word in tweet.split():
        if word.startswith('@') and len(word) > 1:
            word = '@user'
        elif word.startswith('http'):
            word = 'http'
        tweet_words.append(word)
    return ' '.join(tweet_words)

def classify_sentiment(tweet):
    tweet_proc = preprocess_tweet(tweet)
    encoded_tweet = tokenizer(tweet_proc, return_tensors='pt')
    output = model(**encoded_tweet)
    scores = softmax(output[0][0].detach().numpy())
    max_score_idx = scores.argmax()
    return labels[max_score_idx]

In [None]:
tweets = ['I love this!', 'This is bad.', "Meh, it's okay.", 'Absolutely terrible experience.', 'What a wonderful day!']
df = pd.DataFrame(tweets, columns=['TweetText'])
df['Sentiment'] = df['TweetText'].apply(classify_sentiment)
df

In [None]:
sentiment_counts = df['Sentiment'].value_counts()
plt.figure(figsize=(6, 4))
plt.bar(sentiment_counts.index, sentiment_counts.values, color=['green', 'blue', 'red'])
plt.title('Sentiment Distribution')
plt.xlabel('Sentiment')
plt.ylabel('Count')
plt.tight_layout()
plt.show()