In [80]:
import math
from pprint import pprint
import pandas as pd
import numpy as np
import nltk
import matplotlib.pyplot as plt
import seaborn as sns
import praw
import csv
import os
from urllib.error import HTTPError

In [81]:
def get_submissions(subreddit_str):
    
    if not os.path.exists('praw_output'):
        os.mkdir('praw_output')
    
    subreddit = reddit.subreddit(subreddit_str)
    print('Getting submissions and comments from: {}'.format(subreddit.display_name))  
    try:
        title = subreddit.title
        desc = subreddit.description

        if not os.path.exists('subreddits.tsv'):
            with open('subreddits.tsv','w') as f:
                csvwriter = csv.writer(f, delimiter='\t')
                csvwriter.writerow([subreddit.display_name,title,desc])
        else:
            with open('subreddits.tsv','a') as f:
                csvwriter = csv.writer(f, delimiter='\t')
                csvwriter.writerow([subreddit.display_name,title,desc])

        # Write header
        with open(os.path.join('praw_output','{}.tsv'.format(subreddit.display_name)), 'w', newline='\n') as csvfile:
            csvwriter = csv.writer(csvfile, delimiter='\t',
                                    quotechar='|', quoting=csv.QUOTE_MINIMAL)
            csvwriter.writerow(['title','author','date','is_video','id','num_downs','num_ups','upvote_ratio',
                               'num_comments','score','text','subreddit'])

        # Write body
        for submission in subreddit.new(limit=None):
            sub_title = submission.title
            sub_author = submission.author.name if submission.author is not None else -1
            sub_date = submission.created
            sub_is_vid = submission.is_video
            sub_id = submission.id
            sub_downvotes = submission.downs
            sub_upvotes = submission.ups
            sub_upvote_ratio = submission.upvote_ratio
            sub_num_comments = submission.num_comments
            sub_score = submission.score
            sub_text = submission.selftext.strip().replace('\t','').replace('\n','')
            sub_subreddit = submission.subreddit.display_name
            with open(os.path.join('praw_output','{}.tsv'.format(subreddit.display_name)), 'a', newline='\n') as csvfile:
                csvwriter = csv.writer(csvfile, delimiter='\t',
                                    quotechar='|', quoting=csv.QUOTE_MINIMAL)
                csvwriter.writerow([sub_title,sub_author,sub_date,sub_is_vid,sub_id,sub_downvotes,
                                   sub_upvotes,sub_upvote_ratio,sub_num_comments,sub_score,sub_text,sub_subreddit])
    except HTTPError as e:
        if e.code == 403:
            print('Forbidden: private subreddit.')

In [62]:
# Read in climate-related subreddits
SUBREDDITS = pd.read_csv('CLIMATE_SUBREDDITS.txt',sep='\t',header=0)
SUBREDDITS.stance.value_counts()

pro     42
anti    20
neut    10
Name: stance, dtype: int64

In [68]:
len(set(SUBREDDITS['subreddit'])),len(SUBREDDITS['subreddit'])

(72, 72)

In [2]:
# Create reddit instance
reddit = praw.Reddit(client_id='1sbu376RCBiWRw',
                     client_secret='NbqiHMPiKicBXvgfrID-xVNktZM',
                     user_agent='mac:cc_framing:v1 (by /u/emma_cc_research)')

In [69]:
SUBREDDITS_LIST = list(SUBREDDITS['subreddit'])

In [83]:
for i in range(len(SUBREDDITS['subreddit'])-1,len(SUBREDDITS['subreddit'])):
    SUBREDDIT = SUBREDDITS_LIST[i]
    get_submissions(SUBREDDIT)

Getting submissions and comments from: climatestasis
Getting submissions and comments from: ThunbergSyndrome
Getting submissions and comments from: ClimateChangeCancer
Getting submissions and comments from: GlobalWarmingisBunk
Getting submissions and comments from: GWB
Getting submissions and comments from: 350ppm
Getting submissions and comments from: WorldClimate
Getting submissions and comments from: ClimateMobilization
Getting submissions and comments from: Republican
Getting submissions and comments from: republicans
Getting submissions and comments from: askaconservative
Getting submissions and comments from: Conservative
Getting submissions and comments from: conservatives
Getting submissions and comments from: TrueConservativism


NotFound: received 404 HTTP response

In [52]:
pd.read_csv('subreddits.tsv',sep='\t',header=None)

Unnamed: 0,0,1,2
0,redditdev,reddit Development,A subreddit for discussion of reddit API clien...
1,redditdev,reddit Development,A subreddit for discussion of reddit API clien...
2,redditdev,reddit Development,A subreddit for discussion of reddit API clien...


In [54]:
redditdev_df = pd.read_csv('praw_output/redditdev.tsv',sep='\t',header=0)

In [55]:
redditdev_df.columns

Index(['title', 'author', 'date', 'is_video', 'id', 'num_downs', 'num_ups',
       'upvote_ratio', 'num_comments', 'score', 'text', 'subreddit'],
      dtype='object')

In [56]:
redditdev_df.is_video.value_counts()

False    999
Name: is_video, dtype: int64

In [57]:
redditdev_df.subreddit.value_counts()

redditdev    999
Name: subreddit, dtype: int64

In [58]:
redditdev_df.head(15)

Unnamed: 0,title,author,date,is_video,id,num_downs,num_ups,upvote_ratio,num_comments,score,text,subreddit
0,Find old user upvoted post,Snarbly,1594867000.0,False,hrt38p,0,1,1.0,4,1,I'm attempting to access an account's old upvo...,redditdev
1,How to get top all time with a limit?,redditcreate9989,1594854000.0,False,hrp9me,0,11,0.84,5,11,Hello. I'm trying to get the top post from all...,redditdev
2,Inquiry of access and use of the website data,vivi121,1594851000.0,False,hroalq,0,1,1.0,1,1,I am programming in Python. I am curious that ...,redditdev
3,I want to remove all default universal require...,Xecman,1594811000.0,False,hrfue0,0,0,0.5,2,0,"Hi guys, I made a new sub and want even brand ...",redditdev
4,Not sure if this is the Right Sub - Question o...,natesroomrule,1594806000.0,False,hremxm,0,4,0.75,5,4,I am new to all of this. Not a programmer by t...,redditdev
5,Help with configuring AutoMod Minimum Karma Re...,Xecman,1594788000.0,False,hr9kkk,0,2,1.0,2,2,Hi all; this is probably really simple if you ...,redditdev
6,Old coder left and looking to update/remove wi...,bbopki,1594777000.0,False,hr5t1k,0,1,1.0,3,1,I'm not sure if I am in the right place. Our o...,redditdev
7,Unauthorized error on request,BAG0N,1594774000.0,False,hr53ut,0,1,1.0,5,1,I'm trying to request reddit api in python but...,redditdev
8,Are the bots that are still running all using ...,Capybasilisk,1594769000.0,False,hr3g9n,0,17,1.0,10,17,I notice that some sitewide bots are still ope...,redditdev
9,I want to know the body of parent,JeongbinKim,1594757000.0,False,hr088j,0,1,0.67,2,1,I want to know the parent comment of comment.I...,redditdev


In [7]:
# assume you have a Reddit instance bound to variable `reddit`
top_level_comments = list(submission.comments)
all_comments = submission.comments.list()

In [8]:
top_level_comments

[Comment(id='fy1wqz6'), Comment(id='fy4ocqu')]

In [9]:
all_comments

[Comment(id='fy1wqz6'),
 Comment(id='fy4ocqu'),
 Comment(id='fy1wzrq'),
 Comment(id='fy1xzur'),
 Comment(id='fy1yu05')]

In [76]:
test_sub = reddit.subreddit(SUBREDDIT)

In [77]:
pprint(vars(test_sub))

{'_fetched': False,
 '_path': 'r/ClimateSkepticNews/',
 '_reddit': <praw.reddit.Reddit object at 0x1259e8da0>,
 'display_name': 'ClimateSkepticNews'}


In [10]:
pprint(vars(all_comments[0]))

{'_fetched': True,
 '_reddit': <praw.reddit.Reddit object at 0x1259e8da0>,
 '_replies': <praw.models.comment_forest.CommentForest object at 0x125a9de48>,
 '_submission': Submission(id='hr53ut'),
 'all_awardings': [],
 'approved_at_utc': None,
 'approved_by': None,
 'archived': False,
 'associated_award': None,
 'author': Redditor(name='Dan6erbond'),
 'author_flair_background_color': None,
 'author_flair_css_class': 'lib-developer',
 'author_flair_richtext': [],
 'author_flair_template_id': 'e07f7d0a-5057-11e7-90fe-0e342a867c0a',
 'author_flair_text': 'aPRAW Author',
 'author_flair_text_color': 'dark',
 'author_flair_type': 'text',
 'author_fullname': 't2_11qzch',
 'author_patreon_flair': False,
 'author_premium': False,
 'awarders': [],
 'banned_at_utc': None,
 'banned_by': None,
 'body': 'Are you making sure to set the content-type to `urlformencoded`?',
 'body_html': '<div class="md"><p>Are you making sure to set the content-type '
              'to <code>urlformencoded</code>?</p>\n

In [12]:
all_comments[0].author.name

'Dan6erbond'

In [22]:
all_comments[0].subreddit.display_name

'redditdev'

In [23]:
comment_meta = ['.author.name','.body','controversiality','id','permalink','likes','score','ups',
                '.subreddit.display_name']