# Reddit Query Tool

## What is it?
This tool lets you query information from the [Reddit Pushshift API](https://github.com/pushshift/api). With this tool you can:

- Explore how often Reddit users have posted about a given topic
- Find sample posts about a given topic
- Get top-level metrics about Reddit posts

##  How can I use it?
To begin, hit "Cell" -> "Run all"run tool:
https://screenshot.googleplex.com/mPuJGiqfKMK.png

Fill out the below form. All fields are optional.

Once ready, hit "Go!"

## More info
For questions, comments, feature requests, more information, or a friendly chat reach out to zmpollack@.

In [None]:
import requests
import json
import time
import os
import urllib
import csv
import ipywidgets as widgets
from IPython.display import display
from IPython.display import HTML

# Taken from https://stackoverflow.com/questions/31517194/how-to-hide-one-specific-cell-input-or-output-in-ipython-notebook
tag = HTML('''<script>
code_show=true; 
function code_toggle() {
    if (code_show){
        $('div.cell.code_cell.rendered.selected div.input').hide();
    } else {
        $('div.cell.code_cell.rendered.selected div.input').show();
    }
    code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
To show/hide this cell's raw code input, click <a href="javascript:code_toggle()">here</a>.''')
display(tag)

ids = ''
q = ''
q_not = ''
title = ''
titlenot = ''
size = ''
author = ''
subreddit = ''
after = ''
before = ''
score = ''
num_comments = ''
sort = ''
sort_type = ''
is_csv = 'Text'

id_widget = widgets.Text(
    value='',
    style = {'description_width': 'initial'},
    placeholder='abcd, wxyz, qwerty',
    description='Post IDs',
    disabled=False
)

q_widget = widgets.Text(
    value='',
    style = {'description_width': 'initial'},
    placeholder='android',
    description='Search term',
    disabled=False
)

q_not_widget = widgets.Text(
    value='',
    style = {'description_width': 'initial'},
    placeholder='robots',
    description='Exlude search term',
    disabled=False
)

title_widget = widgets.Text(
    value='',
    style = {'description_width': 'initial'},
    placeholder='battery',
    description='Title keyword',
    disabled=False
)

titlenot_widget = widgets.Text(
    value='',
    style = {'description_width': 'initial'},
    placeholder='iphone',
    description='Exclude keyword in title',
    disabled=False
)

size_widget = widgets.Text(
    value='',
    style = {'description_width': 'initial'},
    placeholder='20',
    description='# posts returned, between 0 - 500',
    disabled=False
)

author_widget = widgets.Text(
    value='',
    style = {'description_width': 'initial'},
    placeholder='sundar',
    description='Post author',
    disabled=False
)

subreddit_widget = widgets.Text(
    value='',
    style = {'description_width': 'initial'},
    placeholder='google',
    description='Subreddit. Do not include "r/"',
    disabled=False
)

after_widget = widgets.Text(
    value='',
    style = {'description_width': 'initial'},
    placeholder='09/27/2019',
    description='Start date. Format mm/dd/yyyy',
    disabled=False
)


before_widget = widgets.Text(
    value='',
    style = {'description_width': 'initial'},
    placeholder='12/27/2019',
    description='End date. Format mm/dd/yyyy',
    disabled=False
)
score_widget = widgets.Text(
    value='',
    style = {'description_width': 'initial'},
    placeholder='>100',
    description='Score',
    disabled=False
) 

num_comments_widget = widgets.Text(
    value='',
    style = {'description_width': 'initial'},
    placeholder='>10',
    description='# Comments',
    disabled=False
) 

sort_widget = widgets.RadioButtons(
    options=['desc', 'asc'],
    style = {'description_width': 'initial'},
    value='desc',
    description='Order by',
    disabled=False
)

sort_type_widget = widgets.RadioButtons(
    options=['score', 'num_comments', 'created_utc'],
    style = {'description_width': 'initial'},
    value='score',
    description='Sort by',
    disabled=False
)

is_csv_widget = widgets.RadioButtons(
    options=['csv', 'Text'],
    style = {'description_width': 'initial'},
    value='Text',
    description='Output formated as text or csv',
    disabled=False
)


submit_widget = widgets.Button(description="Go!")
output = widgets.Output()

def convertTime():
    global after
    global before
    pattern = '%m/%d/%Y'
    os.environ['TZ']='UTC'
    
    if after == '':
        pass
    else:
        after = str(int(time.mktime(time.strptime(after, pattern))))
    if before == '':
        pass
    else:
        before = str(int(time.mktime(time.strptime(before, pattern))))

def updateVars():
    global ids
    global q
    global q_not
    global title
    global titlenot
    global size
    global author
    global subreddit
    global after
    global before
    global score
    global num_comments
    global sort
    global sort_type
    global is_csv
    
    ids = id_widget.value
    q = q_widget.value
    q_not = q_not_widget.value
    title = title_widget.value
    titlenot = titlenot_widget.value
    size = size_widget.value
    author = author_widget.value
    subreddit = subreddit_widget.value
    after = after_widget.value
    before = before_widget.value
    score = score_widget.value
    num_comments = num_comments_widget.value
    sort = sort_widget.value
    sort_type = sort_type_widget.value
    is_csv = is_csv_widget.value
    
def createRequest():
    global q
    global q_not
    global title
    global titlenot
    
    if q == '':
        pass
    else:
        q = urllib.parse.quote_plus(q)
        
    if q_not == '':
        pass
    else:
        q_not = urllib.parse.quote_plus(q_not)
               
    if title == '':
        pass
    else:
        title = urllib.parse.quote_plus(title)
        
    if titlenot == '':
        pass
    else:
        titlenot = urllib.parse.quote_plus(titlenot)
    
    request = 'https://api.pushshift.io/reddit/search/submission/?'
    request += 'size=' + size + '&'
    request += "ids=" + ids + "&"
    request += "q=" + q + "&"
    request += "q:not=" + q_not + "&"
    request += "title=" + title + "&"
    request += "title:not=" + titlenot + "&"
    request += "author=" + author + "&"
    request += "subreddit=" + subreddit + "&"
    request += "after=" + after + "&"
    request += "before=" + before + "&"
    request += "score=" + score + "&"
    request += "num_comments=" + num_comments + "&"
    request += "sort=" + sort + "&"
    request += "sort_type=" + sort_type + "&"
    request += "metadata=true" 
    
    return request
    
def getResponse(request):
    r = requests.get(request)
    result = json.loads(r.content)
    return result
    
def formatResponse(args):
    
    metadata = args['metadata']
    total_results = metadata['total_results']
    results_returned = metadata['results_returned']
    
    metadata_str = """
    **Info** \n \n
    Total submissions: %s \n
    Posts returned: %s \n
    *************************** \n
    """ %(total_results, results_returned)
    
    data = args['data']

    
    i = 0 
    data_str = '**Posts** \n \n'
    
    while i < results_returned:
        author = data[i]['author']
        full_link = data[i]['full_link']
        num_comments = data[i]['num_comments']
        subreddit = data[i]['subreddit']
        title = data[i]['title']
        score = data[i]['score']
        
        data_str += """
        Title : %s \n
        Author: %s \n
        Subreddit: %s \n
        Link: %s \n
        Number of comments: %s \n
        Score: %s \n
        *************************** \n
        """ %(title, author, subreddit, full_link, num_comments, score)
        
        i += 1
    
    full_str = str(metadata_str) + str(data_str)
    
    return(full_str)

def csvResponse(args):
    
    metadata = args['metadata']
    try:
        results_returned = metadata['results_returned']
        data = args['data'] 
        i = 0
        data_str = """
        Copy the below, paste without formatting into a Google Sheet \n
        ****************************************************************\n
        title\t author\t subreddit\t full link\t number of comments\t score
        """

        while i < results_returned:
            author = str(data[i]['author']).rstrip()
            full_link = str(data[i]['full_link']).rstrip()
            num_comments = str(data[i]['num_comments']).rstrip()
            subreddit = str(data[i]['subreddit']).rstrip()
            title = str(data[i]['title']).rstrip()
            score = str(data[i]['score']).rstrip()

            data_str += "%s\t%s\t%s\t%s\t%s\t%s\n" %(title, author, subreddit, full_link, num_comments, score)

            i += 1

        return(data_str)

    except:
        print('no results')
        print(args)

def onSubmit(b):
    with output:
        updateVars()
        convertTime()
        request = createRequest()
        response = getResponse(request)
        if is_csv == 'csv':
            print(csvResponse(response))
        else:
            print(formatResponse(response))

display(id_widget,
        q_widget,
        q_not_widget,
        title_widget,
        titlenot_widget,
        size_widget,
        author_widget,
        subreddit_widget,
        after_widget,
        before_widget,
        score_widget,
        num_comments_widget,
        sort_widget,
        sort_type_widget,
        is_csv_widget, 
        submit_widget, 
        output)

In [None]:
# Taken from https://stackoverflow.com/questions/31517194/how-to-hide-one-specific-cell-input-or-output-in-ipython-notebook
tag = HTML('''<script>
code_show=true; 
function code_toggle() {
    if (code_show){
        $('div.cell.code_cell.rendered.selected div.input').hide();
    } else {
        $('div.cell.code_cell.rendered.selected div.input').show();
    }
    code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
To show/hide this cell's raw code input, click <a href="javascript:code_toggle()">here</a>.''')
display(tag)

submit_widget.on_click(onSubmit)

In [None]:
submit_widget.on_click(onSubmit)