In [1]:
# First, let's load the Reddit API creds.
%load_ext dotenv
%dotenv ../ingest/praw_creds.env
%dotenv ../.env

import os

REDDIT_CLIENT_ID = os.environ.get("REDDIT_CLIENT_ID")
REDDIT_CLIENT_SECRET = os.environ.get("REDDIT_CLIENT_SECRET")
SUBREDDIT = "politics"
VERSION = os.environ.get("VERSION") + "-eda"

In [2]:
import asyncpraw

"""
Set up the Reddit client instance.
I'm using a read-only PRAW intance because I have no need to post comments.
I'm using async PRAW pretty much just because `ingest` does so.

Credentials need to be supplied via env var.
"""
reddit = asyncpraw.Reddit(
    client_id=REDDIT_CLIENT_ID,
    client_secret=REDDIT_CLIENT_SECRET,
    user_agent=f"python:vivshaw/politeiamancer:{VERSION} (by /u/vivshaw)",
)

In [3]:
"""
Let's load some comments!
"""
subreddit = await reddit.subreddit(SUBREDDIT)

comments = []

async for comment in subreddit.comments(limit=100):
    comment_as_dict = {
        # ID
        "fullname": comment.name,
        # Comment details
        "author": comment.author,
        "body": comment.body,
        "permalink": comment.permalink,
        # Time
        "created_utc": int(comment.created_utc),
    }
    comments.append(comment_as_dict)

In [4]:
import pandas as pd

"""
Now that we've loaded them, we need to get 'em into a Pandas dataframe.
"""

df = pd.DataFrame(comments)
df.head()

Unnamed: 0,fullname,author,body,permalink,created_utc
0,t1_ktkstc0,VTinstaMom,Never going to happen to an American ally.\n\n...,/r/politics/comments/1b7pvd0/nikki_haley_wins_...,1709708718
1,t1_ktksskc,Zimmonda,"""All-in"" would be a fucking carrier strike gro...",/r/politics/comments/1b7k202/the_biden_adminis...,1709708704
2,t1_ktkssiu,offline4good,Halley is republican,/r/politics/comments/1b7pigp/haley_defeats_tru...,1709708703
3,t1_ktkssdx,ClusterFoxtrot,"I don't know, a lot of primary interviewees sa...",/r/politics/comments/1b7s39r/donald_trump_wins...,1709708701
4,t1_ktkss2d,SoggyBoysenberry7703,"Is this truly acceptable to do though? Like, l...",/r/politics/comments/1b7mt3g/house_backs_bill_...,1709708695


In [12]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

"""
Time for some sentiment analysis!
"""


def sentiment_score(text: str) -> float:
    """
    Calculate a sentiment score for a piece of text using VADER.
    """
    sentiment_intensity_analyzer = SentimentIntensityAnalyzer()
    valence_scores = sentiment_intensity_analyzer.polarity_scores(text)
    return valence_scores


ratings_df = df["body"].apply(sentiment_score).apply(pd.Series)
df = pd.concat([df, ratings_df], axis=1)

Unnamed: 0,neg,neu,pos,compound
count,100.0,100.0,100.0,100.0
mean,0.11168,0.78349,0.10486,-0.037621
std,0.159694,0.190293,0.131168,0.478529
min,0.0,0.0,0.0,-0.9094
25%,0.0,0.6925,0.0,-0.411525
50%,0.083,0.795,0.0615,0.0
75%,0.1785,0.942,0.167,0.32895
max,1.0,1.0,0.661,0.962


In [13]:
"""
Let's see some summary stats.
"""

df[["neg", "neu", "pos", "compound"]].describe()

Unnamed: 0,neg,neu,pos,compound
count,100.0,100.0,100.0,100.0
mean,0.11168,0.78349,0.10486,-0.037621
std,0.159694,0.190293,0.131168,0.478529
min,0.0,0.0,0.0,-0.9094
25%,0.0,0.6925,0.0,-0.411525
50%,0.083,0.795,0.0615,0.0
75%,0.1785,0.942,0.167,0.32895
max,1.0,1.0,0.661,0.962


In [8]:
"""
How 'bout looking at our most-negative, most-neutral, and most-positive comment?
"""

most_negative = df.loc[df["neg"].idxmax()]
print("Most negative comment:")
print(most_negative["body"])
print("\n")

most_neutral = df.loc[df["neu"].idxmax()]
print("Most neutral comment:")
print(most_neutral["body"])
print("\n")

most_positive = df.loc[df["pos"].idxmax()]
print("Most positive comment:")
print(most_positive["body"])
print("\n")

"""
OK, how about by compound score?
"""
compound_most_negative = df.loc[df["compound"].idxmin()]
print("Most negative compound score comment:")
print(compound_most_negative["body"])
print("\n")

compound_most_positive = df.loc[df["compound"].idxmax()]
print("Most positive compound score comment:")
print(compound_most_positive["body"])
print("\n")

# Alright, seems like compound scores are the way to go. The raw scores are not that informative.

Most negative comment:
No.


Most neutral comment:
Halley is republican


Most positive comment:
Super Tuesday Sweep 


Most negative compound score comment:
Obama got substantially more undecided opposition. This isn't a real issue.

If the opposing candidate offered anything to these voters, that's one thing. But the opposing candidate is setting precious things on fire in front of the voters, such that apathy doesn't really set in, in the same way.

If the Republican candidate was a normal person, this would be a different calculus. 

Despite the propaganda and media screaming "lefties drop out!" the omnipresence of Christian fascists and dumb grifters forces everyone toward the opposite of that, which is Biden.

Trump is sabotaging himself, and Biden is the incumbent.


Most positive compound score comment:
Since they're using the bible so much to justify this: We can and hopefully will, be able to use their own book against them. The bible's few mentions abortion is when and how t

In [9]:
from nrclex import NRCLex

"""
Next up, we'll analyze some emotional valence.
"""


def emotion_scores(text):
    emotions = {
        "anger": 0,
        "disgust": 0,
        "fear": 0,
        "joy": 0,
        "sadness": 0,
        "surprise": 0,
        "trust": 0,
    }

    affect_frequencies = NRCLex(text).affect_frequencies

    for key in emotions.keys():
        if key in affect_frequencies:
            emotions[key] = affect_frequencies[key]

    return emotions


emotions_df = df["body"].apply(emotion_scores).apply(pd.Series)
df = pd.concat([df, emotions_df], axis=1)

Unnamed: 0,fullname,author,body,permalink,created_utc,neg,neu,pos,compound,anger,anticip,disgust,fear,joy,negative,positive,sadness,surprise,trust
0,t1_ktkstc0,VTinstaMom,Never going to happen to an American ally.\n\n...,/r/politics/comments/1b7pvd0/nikki_haley_wins_...,1709708718,0.156,0.844,0.0,-0.34,0.142857,0.0,0.0,0.142857,0.0,0.142857,0.142857,0.142857,0.0,0.142857
1,t1_ktksskc,Zimmonda,"""All-in"" would be a fucking carrier strike gro...",/r/politics/comments/1b7k202/the_biden_adminis...,1709708704,0.272,0.728,0.0,-0.8214,0.222222,0.0,0.111111,0.222222,0.0,0.222222,0.0,0.111111,0.0,0.0
2,t1_ktkssiu,offline4good,Halley is republican,/r/politics/comments/1b7pigp/haley_defeats_tru...,1709708703,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,t1_ktkssdx,ClusterFoxtrot,"I don't know, a lot of primary interviewees sa...",/r/politics/comments/1b7s39r/donald_trump_wins...,1709708701,0.052,0.805,0.143,0.5423,0.111111,0.0,0.0,0.0,0.111111,0.111111,0.222222,0.111111,0.111111,0.111111
4,t1_ktkss2d,SoggyBoysenberry7703,"Is this truly acceptable to do though? Like, l...",/r/politics/comments/1b7mt3g/house_backs_bill_...,1709708695,0.0,0.614,0.386,0.6007,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [17]:
"""
Let's see some summary stats.
"""

df[["anger", "disgust", "fear", "joy", "sadness", "surprise", "trust"]].describe()

Unnamed: 0,anger,anticip,disgust,fear,joy,sadness,surprise,trust
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,0.06232,0.0,0.027664,0.062342,0.040697,0.061498,0.033497,0.074562
std,0.092858,0.0,0.05893,0.123815,0.061141,0.097299,0.05929,0.126041
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.119485,0.0,0.0,0.1,0.083333,0.111111,0.064286,0.125
max,0.5,0.0,0.25,1.0,0.25,0.5,0.25,1.0


In [20]:
"""
OK, most emotional comments?
"""
most_angry = df.loc[df["anger"].idxmax()]
print("Most angry comment:")
print(most_angry["body"])

most_disgusted = df.loc[df["disgust"].idxmax()]
print("Most disgusted comment:")
print(most_disgusted["body"])

most_fearful = df.loc[df["fear"].idxmax()]
print("Most fearful comment:")
print(most_fearful["body"])

most_joyful = df.loc[df["joy"].idxmax()]
print("Most joyful comment:")
print(most_joyful["body"])

most_sad = df.loc[df["sadness"].idxmax()]
print("Most sad comment:")
print(most_sad["body"])

most_surprised = df.loc[df["surprise"].idxmax()]
print("Most surprised comment:")
print(most_surprised["body"])

most_trustful = df.loc[df["trust"].idxmax()]
print("Most trustful comment:")
print(most_trustful["body"])


# Alright, emotional analysis is not amazingly accurate. But it's worth poking at.

Most angry comment:
Biden is going to have to deny Trumps election results because of voter fraud.  Just like Trump tried to do on Jan 6th.
Most anticipatory comment:
Never going to happen to an American ally.

That's literally pants on head crazy.
Most disgusted comment:
Bloomberg won that primary in 2020.

Nobody gives a shit
Most fearful comment:
"i dont like the verdict, please give me another trial."
Most joyful comment:
Q.com is more fitting
Most sad comment:
He’s been loosing he lost me
Most surprised comment:
I like how the goalposts keep shifting. Now a ceasefire isn't even serious. And you really expect people to take you seriously?
Most trustful comment:
I’m a millennial so it’s not an issue for me. For a boomer which is what makes up most of the voter base in the district? I think they might.


In [21]:
"""
Let's grab the comments mentioning Trump.
"""
bodies = df["body"].str.lower()

trump_filter = bodies.str.contains("trump")
trump_df = df.loc[trump_filter]
trump_df.head()

Unnamed: 0,fullname,author,body,permalink,created_utc,anger,anticip,disgust,fear,joy,negative,positive,sadness,surprise,trust,neg,neu,pos,compound
3,t1_ktkssdx,ClusterFoxtrot,"I don't know, a lot of primary interviewees sa...",/r/politics/comments/1b7s39r/donald_trump_wins...,1709708701,0.111111,0.0,0.0,0.0,0.111111,0.111111,0.222222,0.111111,0.111111,0.111111,0.052,0.805,0.143,0.5423
6,t1_ktksrzr,not_enough_characte,There's probably a lot of Trump voters that wo...,/r/politics/comments/1b75ytt/discussion_thread...,1709708694,0.125,0.0,0.0,0.0,0.125,0.125,0.125,0.125,0.125,0.125,0.0,1.0,0.0,0.0
7,t1_ktksrxt,ratherbealurker,You are completely wrong on Jan 6. How did he ...,/r/politics/comments/1b75ytt/discussion_thread...,1709708693,0.142857,0.0,0.095238,0.095238,0.047619,0.190476,0.142857,0.095238,0.0,0.095238,0.141,0.739,0.12,-0.5931
18,t1_ktksq0f,CustomAlpha,Trump tried to deny voters of their right to h...,/r/politics/comments/1b79xnk/trump_knocks_demo...,1709708657,0.166667,0.0,0.0,0.0,0.083333,0.166667,0.166667,0.083333,0.083333,0.166667,0.088,0.912,0.0,-0.34
26,t1_ktksnp9,galaxysword2,He probably wants Trump to win in reality.,/r/politics/comments/1b7rlnf/uncommitted_prote...,1709708616,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.648,0.352,0.5859


In [37]:
trump_df[
    [
        "compound",
        "anger",
        "disgust",
        "fear",
        "joy",
        "sadness",
        "surprise",
        "trust",
    ]
].agg(["count", "min", "max", "mean", "median", "skew", "std"])

Unnamed: 0,compound,anger,disgust,fear,joy,sadness,surprise,trust
count,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0
min,-0.9094,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,0.9517,0.5,0.117647,0.333333,0.125,0.25,0.24,0.166667
mean,-0.03586,0.147807,0.027697,0.075048,0.055264,0.098822,0.056442,0.06762
median,-0.23565,0.125,0.0,0.02,0.054412,0.089286,0.021739,0.078788
skew,0.151992,1.432738,1.135812,1.334379,0.064119,0.400629,1.296814,0.210158
std,0.618429,0.119072,0.042158,0.100833,0.04792,0.080314,0.072694,0.064041


In [27]:
"""
Now, those referring to Biden.
"""

biden_filter = bodies.str.contains("biden")
biden_df = df.loc[biden_filter]
biden_df.head()

Unnamed: 0,fullname,author,body,permalink,created_utc,anger,anticip,disgust,fear,joy,negative,positive,sadness,surprise,trust,neg,neu,pos,compound
3,t1_ktkssdx,ClusterFoxtrot,"I don't know, a lot of primary interviewees sa...",/r/politics/comments/1b7s39r/donald_trump_wins...,1709708701,0.111111,0.0,0.0,0.0,0.111111,0.111111,0.222222,0.111111,0.111111,0.111111,0.052,0.805,0.143,0.5423
12,t1_ktksqsn,Disastrous_Bad3084,>Biden can push against Israel on things. It d...,/r/politics/comments/1b62u5i/harris_escalates_...,1709708672,0.09375,0.0,0.03125,0.0625,0.03125,0.21875,0.28125,0.0625,0.0625,0.09375,0.126,0.797,0.077,-0.8695
17,t1_ktksq66,StIdes-and-a-swisher,Also the Taylor swift x NFl x cIA x FBI x demo...,/r/politics/comments/1b7pigp/haley_defeats_tru...,1709708661,0.0,0.0,0.0,0.1,0.0,0.2,0.2,0.2,0.1,0.1,0.0,0.808,0.192,0.4215
18,t1_ktksq0f,CustomAlpha,Trump tried to deny voters of their right to h...,/r/politics/comments/1b79xnk/trump_knocks_demo...,1709708657,0.166667,0.0,0.0,0.0,0.083333,0.166667,0.166667,0.083333,0.083333,0.166667,0.088,0.912,0.0,-0.34
20,t1_ktksowg,_far-seeker_,>Biden lost American Samoa today. \n\nThat was...,/r/politics/comments/1b7pigp/haley_defeats_tru...,1709708638,0.0,0.0,0.0,0.0,0.0,0.333333,0.333333,0.333333,0.0,0.0,0.204,0.796,0.0,-0.3182


In [38]:
biden_df[
    ["compound", "anger", "disgust", "fear", "joy", "sadness", "surprise", "trust"]
].agg(["count", "min", "max", "mean", "median", "skew", "std"])

Unnamed: 0,compound,anger,disgust,fear,joy,sadness,surprise,trust
count,18.0,18.0,18.0,18.0,18.0,18.0,18.0,18.0
min,-0.9094,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,0.7249,0.5,0.133333,0.25,0.111111,0.333333,0.133333,0.3
mean,-0.1985,0.122294,0.023929,0.060542,0.05034,0.119684,0.056379,0.082427
median,-0.3182,0.09233,0.0,0.062996,0.058333,0.091667,0.062996,0.096875
skew,0.292662,1.828542,1.724336,1.204938,-0.120774,0.679366,-0.099345,1.069384
std,0.584338,0.122483,0.041735,0.068874,0.041615,0.095389,0.046011,0.078468


In [41]:
# What do the emotional deltas look like?
(
    biden_df[
        ["compound", "anger", "disgust", "fear", "joy", "sadness", "surprise", "trust"]
    ].mean()
    - trump_df[
        ["compound", "anger", "disgust", "fear", "joy", "sadness", "surprise", "trust"]
    ].mean()
)

compound   -0.162640
anger      -0.025513
disgust    -0.003768
fear       -0.014506
joy        -0.004924
sadness     0.020861
surprise   -0.000062
trust       0.014807
dtype: float64