# Reddit API Query

In [13]:
import requests
import pandas as pd
import datetime as dt
import time
import pickle
pd.set_option('display.max_columns', None)

__I'll be pulling data from two Reddit threads for this project:__

-[raisedbynarcissits](https://www.reddit.com/r/raisedbynarcissists): "This is a support group for people raised by (or being raised by) a narcissistic parent. Please share your stories, your questions, your histories, your fears and your triumphs. Significant others and friends are all welcome."

-[LifeAfterNarcissism](https://www.reddit.com/r/LifeAfterNarcissism): "A place for those who are moving on from narcissistic family or relationship dynamics."

### 1. Query Reddit

In [2]:
# custom function to query reddit multiple times
# this code is adapted from Hovanes's codes that he shared in class

def reddit_query(subreddit, n_samples):
    base_url = 'https://api.pushshift.io/reddit/submission/search' # base url for api
    list_posts = []
    oldest_post = None
    
    while len(list_posts) < n_samples: # n_samples exceeds the reddit api limit of 1000
        params = {
            "subreddit" : subreddit,
            "size" : 1000,
            "before": oldest_post # a time stamp to set when to start pulling posts
            }  
            
        res = requests.get(base_url,params) # request reddit info
            
        posts = res.json()['data'] # create list from json
        
        if len(posts) == 0:
            oldest_post = None # for the first time running this, before will be None
            list_posts.extend(posts) # add batch of posts to list_posts     
        else:
            time.sleep(3) # wait 3 seconds before querying again
            oldest_post = dt.datetime.fromtimestamp(posts[-1]["created_utc"]) # take the oldest time from the list and pull posts before that time
            list_posts.extend(posts) # add batch of posts to list_posts
    return pd.DataFrame(list_posts) # return pandas dataframe

### Pull data for reddit threads:

In [3]:
rbn = reddit_query("raisedbynarcissists", 10_000)

In [5]:
lan = reddit_query("LifeAfterNarcissism", 10_000)

### 2. Pickle for later use 

In [14]:
pickle.dump(lan, open("../assets/lan.pkl", "wb"))

In [15]:
pickle.dump(rbn, open("../assets/rbn.pkl", "wb"))