<h2>Sentiment Analysis

In [5]:
import pandas as pd
import numpy as np
from nltk.sentiment import SentimentIntensityAnalyzer
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [2]:
from transformers import pipeline



In [35]:
import torch

In [13]:
df = pd.read_csv("../data/raw data/semi_raw.csv", index_col=0)

In [3]:
classifier = pipeline("text-classification",model='bhadresh-savani/distilbert-base-uncased-emotion', return_all_scores=True)

Downloading (…)lve/main/config.json:   0%|          | 0.00/768 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/268M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/291 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



[[{'label': 'sadness', 'score': 0.0006792694330215454}, {'label': 'joy', 'score': 0.9959298968315125}, {'label': 'love', 'score': 0.0009452453814446926}, {'label': 'anger', 'score': 0.0018055177060887218}, {'label': 'fear', 'score': 0.00041110304300673306}, {'label': 'surprise', 'score': 0.00022885673388373107}]]


In [26]:
prediction = classifier(df.text[5])
print(prediction)
emotion = [x['label'] for x in prediction[0]]
score = [y['score'] for y in prediction[0]]

[[{'label': 'sadness', 'score': 0.009748699143528938}, {'label': 'joy', 'score': 0.7268431782722473}, {'label': 'love', 'score': 0.0012346131261438131}, {'label': 'anger', 'score': 0.021945590153336525}, {'label': 'fear', 'score': 0.22697383165359497}, {'label': 'surprise', 'score': 0.013254121877253056}]]


In [18]:
df.text[5]

'For context: I don’t dream often but when I do I always wake up thinking that was a little too real. Nothing scary just realistic yet odd situations that could happen in reality. In these dreams I sometimes I have people in them who are only silhouettes. I’ve learnt through my experiences that if I haven’t met this person (yet) they’ll show up in my dreams as silhouettes. Like unlockable characters in a game. Sometimes the scene would be places I’ve never been before until the Deja vu strikes. \n\nI experienced these dreams in reality last week on three separate occasions. The same silhouettes now have faces. Things that happened in dreams came to. This is the first time I’ve had deja vu in ages but before last week I’d have Deja vu once in a blue moon. \n\nWhat’s happening to me?'

In [30]:
fig = make_subplots(rows=1, cols=1)

fig.add_trace(go.Bar(x = emotion,
                        y = score,
                        name = f"Dream {1}"))

fig.update_layout(
                    title="Sentiment Classification Results",
                    xaxis_title="Criteria",
                    yaxis_title="Sentiment Scores",
                    legend_title="Dreams"
                    # font=dict(
                    #     family="Courier New, monospace",
                    #     size=18,
                    #     color="RebeccaPurple"
                    # )
                )

In [5]:
sentiment = SentimentIntensityAnalyzer()

In [8]:
negative = []
positive = []
neutral = []
compound = []

for sentence in df.complete:
    temp = sentiment.polarity_scores(sentence)
    negative.append(temp['neg'])
    positive.append(temp['pos'])
    neutral.append(temp['neu'])
    compound.append(temp['compound'])

final = pd.DataFrame(list(zip(negative,positive,neutral,compound)), columns=['neg','pos','neu','cmp'])

In [9]:
print(final)

       neg    pos    neu     cmp
0    0.046  0.282  0.673  0.9612
1    0.208  0.140  0.652 -0.4102
2    0.092  0.263  0.646  0.9275
3    0.152  0.173  0.674  0.1027
4    0.030  0.212  0.758  0.8731
..     ...    ...    ...     ...
850  0.000  0.000  1.000  0.0000
851  0.150  0.150  0.700  0.2263
852  0.172  0.118  0.711 -0.2960
853  0.050  0.167  0.783  0.9083
854  0.126  0.347  0.528  0.9260

[855 rows x 4 columns]


In [6]:
sentiment.polarity_scores("hello")

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

Choose a dream

In [31]:
ind = 20
ind2 = 30

print(df.complete[ind])
print(final.iloc[20,:].tolist())

dream twice vividly gallery kind buying artwork never done anything like waking life grew really frugal shopped local goodwill also art collector waking life dream always buying large painting blowing lot money richest girl world like specific meaning 
[0.09, 0.23, 0.681, 0.7636]


comparative sentiment

In [32]:
fig = make_subplots(rows=1, cols=2)

fig.add_trace(go.Bar(x = ['Positive', "Negative", "Neutral", "Compound"],
                    y = final.iloc[ind,:].tolist(),
                    name = f"Dream {ind}"),
                    row = 1, col = 1)

fig.add_trace(go.Bar(x = ['Positive', "Negative", "Neutral", "Compound"],
                    y = final.iloc[ind2,:].tolist(),
                    name = f"Dream {ind2}"),
                    row = 1, col = 2)     

fig.update_layout(
                    title="Side by Side Barplot for Sentiment",
                    xaxis_title="Criteria",
                    yaxis_title="Sentiment Scores",
                    legend_title="Dreams"
                    # font=dict(
                    #     family="Courier New, monospace",
                    #     size=18,
                    #     color="RebeccaPurple"
                    # )
                )

Overall Histogram of Sentiments

In [10]:
import plotly.express as px

fig = px.histogram(final, 
                   x = "pos", 
                   labels={"cmp": "Compounded Sentiment", "count": "Count of Dreams"})
fig.show()