# PRAW Library
## Python Reddit API Wrapper

In [None]:
# install library
# !pip install praw

In [None]:
import praw
import pandas as pd
import networkx as nx
#import matplotlib.pyplot as plt

In [None]:
clientid = "EppA5GzbdocKzA"
clientsecret = "PBMQFMsqWuXuqGak1qVNlhzyi4A"
password = 'password'
username = "Aidenberg"
user_agent = "testcript by /u/aiden"

In [None]:
reddit = praw.Reddit(client_id=clientid, 
                     client_secret=clientsecret,
                     password= password, 
                     user_agent='testcript by /u/aiden',
                     username='Aidenberg')

In [None]:
reddit.user.me()

### Play with PRAW

In [None]:
# load submissions title
subreddit = reddit.subreddit('python')
hot_python = subreddit.hot(limit=5)
for submission in hot_python:
    print(submission.title)

In [None]:
# load more information
hot_python = subreddit.hot(limit=5)
for submission in hot_python:
    print(submission.title,
          submission.ups,
          submission.downs,
          submission.visited)

In [None]:
# What else we can do with praw
subreddit.subscribe()
subreddit.unsubscribe()

In [None]:
# Comments
hot_python = subreddit.hot(limit=5)
for submission in hot_python:
    comments = submission.comments
    for comment in comments:
        print(20*'-')
        print(comment.body)

In [None]:
# Comments with threads
# error occurs if thread is too long.

#try again with 'news' subreddit
hot_python = reddit.subreddit('python').hot(limit=5)
for submission in hot_python:
    comments = submission.comments
    for comment in comments:
        print(20*'-')
        print('Parent ID:', comment.parent())
        print('Comment ID:', comment.id)
        print(comment.body)

In [None]:
hot_python = reddit.subreddit('news').hot(limit=1)
for submission in hot_python:
    submission.comments.replace_more(limit=0)
    for comment in submission.comments.list():
        print(20*'-')
        print('Parent ID:', comment.parent())
        print('Comment ID:', comment.id)
        print(comment.body)

## Comment Network

In [None]:
col_edge = ['parentID', 'commentID']
lst = []
hot_python = reddit.subreddit('news').hot(limit=1)
for submission in hot_python:
    submission.comments.replace_more(limit=0)
    for comment in submission.comments.list():
        lst.append([comment.parent().author, comment.author])
            
df_edge = pd.DataFrame(lst, columns=col_edge)

In [None]:
df_edge.head(5)

In [None]:
col_node = ['userID']
df_node = pd.DataFrame(list(set(sum(lst, []))), columns=col_node)

In [None]:
G = nx.Graph()

for index, row in df_node.iterrows():
    G.add_node(row['userID'])
for index, row in df_edge.iterrows():
    G.add_edges_from([(row['parentID'], row['commentID'])])

In [None]:
#plt.figure(figsize=(25,25))
#options = {
#    'edge_color': '#FFDEA2',
#    'width': 1,
#    'with_labels': False,
#    'font_weight': 'regular',
#}
#
#pos = nx.kamada_kawai_layout(G)
#nx.draw(G, node_color = 'lightblue', node_size = [G.degree[i]*50 for i in G], pos=pos, **options)
#        
#ax = plt.gca()
#ax.collections[0].set_edgecolor("#555555") 
#plt.show()

<img src="comment_network.png">

### Collect node attributes

User Karma

User Subscription or active subreddits

Edge-wise attributes can be collected by considering common subreddits.


#### 1. Collect Karma

In [None]:
# Karma can be collected by redditor names
user = reddit.redditor("bboe")
user.comment_karma, user.link_karma

In [None]:
# Karma can be collected by reddior ids (reddit.get function)
# Multiple requests

subreddit = reddit.subreddit('redditdev')
user_ids = [user.id for user in subreddit.moderator()]
data = reddit.get('api/user_data_by_account_ids',
                  params={'ids': ','.join(user_ids)})
for id_, d in data.items():
    print('{name}: {comment_karma} {link_karma}'.format(**d))

#### 2. Collect Date the account created

In [None]:
# When were the accounts created?
import time
for id_, d in data.items():
    print('{}: {}'.format(d.get("name"),
                          time.ctime(d.get("created_utc"))))

#### 4. Active subreddits per user

In [None]:
#user = praw.models.Redditor(reddit, name="/u/fwork", _data=None)
comments_path ="user/bboe/comments" 
comments = praw.models.ListingGenerator(reddit, comments_path, params=None, limit=100)
comments = comments.__iter__()

In [None]:
lst = []
for comment in comments:
    lst.append(comment.subreddit.display_name)

In [None]:
from collections import Counter
Counter(lst).most_common()

## Subreddit Network

In [None]:
## Creating a subreddit network
## connections among subreddits
## how to define edges?

In [None]:
subreddit = reddit.subreddit("programming")

In [None]:
#subreddit = reddit.subreddit('politics')
#   for submission in subreddit.submissions(1478592000, 1478678400):
#       print(submission.title)

##### Weight of edges can be assessed by common users

In [None]:
# Consider 7 different subreddits
subreddit_list = ["programming", "python", "technology", "coding", "django", "philosophy", "PhilosophyofReligion"]
LST = []
for sr_list in subreddit_list:
    subreddit = reddit.subreddit(sr_list)
    lst = []
    for i in subreddit.hot(limit=500):
        lst.append(i.author)
    LST.append(lst)

In [None]:
LST # returns Redditor object. Need to convert them into strings

In [None]:
# LST is a list of redditor objects
# Changing them into user names
l = []; L = []
for i in range(0,7):
    for j in LST[i]:
        if j != None:
            l.append(j.name)
    L.append(l)
    l = []

In [None]:
D = pd.DataFrame(columns = subreddit_list,
                 index = subreddit_list) # contingency matrix storing the number of common users

for i in range(0,7): # for loop for common users
    for j in range(0,7):
        if i < j:
            common_users = set(L[i]).intersection(L[j])
            print('Subreddits:({}, {}) \n Len:{} \n Users: {}'.format(subreddit_list[i], subreddit_list[j], 
                                                                      len(common_users), common_users))
            print(20*'-')
            
            D.at[subreddit_list[i],subreddit_list[j]] = D.at[subreddit_list[j],subreddit_list[i]] = len(common_users)

In [None]:
D

##### Edges by X-post
We may also use X-posts. X-post is cross-post meaning it locates in multiple subreddits.

In [None]:
subreddit_list = ["programming", "python", "technology", "coding", "django", "philosophy", "PhilosophyofReligion"]
LST = []
for sr_list in subreddit_list:
    subreddit = reddit.subreddit(sr_list)
    lst = []
    for i in subreddit.search("x-post", limit=500):
        lst.append(i.author)
    LST.append(lst)

In [None]:
import re #library for regular expression

LST2 = []
for sr_list in subreddit_list:
    subreddit = reddit.subreddit(sr_list)
    
    lst = []
    for i in subreddit.search("x-post", limit=500):
        tmp = re.search(r'r\/(.*?(?=[\]\)\s\/\:]))', i.title)
        if tmp != None:
            lst.append(tmp.group(1))
            
    LST2.append(lst)

In [None]:
LST2
# We can also make edge attributes of weights based on x-posts.