[![Open In Studio Lab](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/tushar-mahalya/Custom-ChatGPT/blob/master/Sentiment&Emotion_Analysis.ipynb)

## Sentiment Analysis
To get a better idea about the sentiment of our Reddit comments we will use pre-trained [RoBERTa](https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment-latest) -base model trained on ~124M tweets from January 2018 to December 2021, and finetuned for sentiment analysis.

In [1]:
import pandas as pd
import transformers as trf

# set logging level to ERROR or higher to ignore warnings from Hugging Face Models
import logging
trf.logging.set_verbosity_error()

ModuleNotFoundError: No module named 'pandas'

In [2]:
# Loading data
posts_df = pd.read_csv('data/Top_Posts.csv')
comments_df = pd.read_csv('data/Top_Posts_Comments.csv')

In [3]:
# Joning Comments with their respective Post ID
comments_posts_merged = posts_df.merge(comments_df, on = 'post_id', how = 'left')

# Deleting rows that doesn't contain any Comment
comments_posts_merged = comments_posts_merged[~comments_posts_merged['comment'].isnull()]

In [4]:
comments_posts_merged.columns

Index(['post_id', 'post_title', 'subreddit', 'post_url', 'flair_text', 'score',
       'comments', 'upvote_ratio', 'date-time', 'year', 'comment'],
      dtype='object')

In [1]:
# Creating a Runtime-instance for our Sentiment Classification Model
sentiment_classifier = trf.pipeline(model = "citizenlab/twitter-xlm-roberta-base-sentiment-finetunned")

NameError: name 'trf' is not defined

In [6]:
# Testing the working of our sentiment classifier model
print(sentiment_classifier('I love Data Science !'))   # Positive Sentence
print(sentiment_classifier('I hate Data Science !'))   # Negative Sentence
print(sentiment_classifier('Some aspect of Data Science I like but some I dislike.'))   # Neutral Sentence

[{'label': 'positive', 'score': 0.9863550066947937}]
[{'label': 'negative', 'score': 0.93429034948349}]
[{'label': 'neutral', 'score': 0.512378990650177}]


In [7]:
def get_comments(word_input: str):
    # Get comments related to specific text
    comments_w_text = comments_posts_merged[comments_posts_merged['post_title'].str.contains(word_input)]
    
    return comments_w_text

def get_sentiment(text: str):
    # Get sentiment prediction scores
    try:
        sentiment = sentiment_classifier(text)[0]['label']
    except:   
        sentiment = 'Not Classified'

    return sentiment

def calculate_sentiment(text: str):
    intext_df = get_comments(text)
    intext_df.loc[:, 'sentiment'] = intext_df['comment'].astype(str).apply(lambda x: get_sentiment(x))
    intext_dict = dict(intext_df['sentiment'].value_counts())
    return intext_df, intext_dict

In [None]:
_, dum = calculate_sentiment('data science')

In [None]:
fig1, ax1 = plt.subplots()
ax1.pie(dum.values(), labels=dum.keys(), autopct='%1.1f%%',
        shadow=True, startangle=90)
#draw circle
centre_circle = plt.Circle((0,0),0.70,fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)
# Equal aspect ratio ensures that pie is drawn as a circle
ax1.axis('equal')  
plt.tight_layout()
ax1.set_title('Sentiment around \'data science\'')
plt.show()

In [None]:
#colors
colors = ['#ff9999','#66b3ff','#99ff99','#ffcc99']
#explsion
explode = (0.05,0.05,0.05,0.05)
 
plt.pie(dum.values(), colors = colors, labels=dum.keys(), autopct='%1.1f%%', startangle=90, pctdistance=0.85, explode = explode)
#draw circle
centre_circle = plt.Circle((0,0),0.70,fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)
# Equal aspect ratio ensures that pie is drawn as a circle
ax1.axis('equal')  
plt.tight_layout()
plt.show()

In [None]:
dum.keys()

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# create a sample data frame
comments_posts_df_sub = pd.DataFrame({
    'sentiment': ['positive', 'positive', 'neutral', 'positive', 'negative', 'neutral', 'neutral', 'neutral'],
    'count': [10, 15, 5, 12, 8, 3, 6, 2]
})

# plot a pie chart
fig, ax = plt.subplots(figsize=(8, 8))
wedges, texts, autotexts = ax.pie(comments_posts_df_sub['count'], labels=comments_posts_df_sub['sentiment'],
                                  autopct='%1.1f%%', startangle=90, counterclock=False)

# format the plot
ax.set_title('Sentiment of around the topic')
ax.axis('equal')

plt.show()


## Emotion Recognition
We'll use [DistilBERT](https://huggingface.co/bhadresh-savani/distilbert-base-uncased-emotion) -uncased Model for Emotion Recognition which is only 60% of the size of orthodox BERT model while retaining 97% of its language understanding. It's trained on similar ~124M tweets from January 2018 to December 2021, and finetuned for emotion recognition of input text.

In [None]:
# Creating a Runtime-instance for our Emotion Classification Model
emotion_classifier = trf.pipeline(model = 'bhadresh-savani/distilbert-base-uncased-emotion')