# Twitter Getter
**This notebook illustrates three different methods of requesting Twitter data**
1. Using OAuth2 to pull data from Twitter's standard search API in JSON format
2. Using Tweepy to pull data from Twitter's standard search API into a SQLite database
3. Using Tweepy to pull live data from Twitter's streaming API in JSON format

### Before getting started, generate the necessary keys and access tokens
You are going to need to create a Twitter account and generate your own keys on [apps.twitter.com](apps.twitter.com)

In [None]:
# After generating your keys, add them here
# You will not be able to connect to Twitter's API without them

API_KEY = 'INSERT YOUR KEY HERE'
API_SECRET = 'INSERT YOUR KEY HERE'
TOKEN_KEY = 'INSERT YOUR KEY HERE'
TOKEN_SECRET = 'INSERT YOUR KEY HERE'

## 1. Using OAuth2 to pull data from Twitter's standard search API in JSON format

In [None]:
import oauth2
import json
import urllib
import pandas as pd

# oauth_req accepts the url of the API endpoint, TOKEN_KEY, and TOKEN_SECRET and returns the requested content
def oauth_req(url, key, secret, http_method="GET", post_body=b"", http_headers=None):
    consumer = oauth2.Consumer(key=API_KEY, secret=API_SECRET)
    token = oauth2.Token(key=key, secret=secret)
    client = oauth2.Client(consumer, token)
    resp, content = client.request(url, method=http_method, body=post_body, headers=http_headers)
    return content

search = "INSERT YOUR SEARCH TERM(S) HERE" # put your search term(s) inside the quotes before running this cell

search_endpoint = 'https://api.twitter.com/1.1/search/tweets.json?q='
formatted_search = urllib.parse.quote(search, safe='')
url = search_endpoint + formatted_search + '&lang=en&result_type=popular&tweet_mode=extended'
data = oauth_req(url, TOKEN_KEY, TOKEN_SECRET)

# writing the retrieved data to a JSON file
with open('tweet_data.json', 'wb') as data_file: 
    data_file.write(data)
    
# loading the JSON data to make sure there was nothing wrong with the file we just created
open_json = open('tweet_data.json').read()
json_data = json.loads(open_json)

# create a Pandas dataframe from the JSON for easier examination and manipulation
tweetDF = pd.DataFrame(json_data['statuses'])

# take a peek at the dataframe
tweetDF.head()

### Print the username and full text of the Tweets we just collected
Twitter's standard search API defaults to a maximum of 15 records per query

In [None]:
for i in range(len(tweetDF.full_text)):
    print("@" + tweetDF.user[i]['screen_name'] + " - " + tweetDF.full_text[i] + "\n____________________\n")

## 2. Using Tweepy to pull data from Twitter's standard search API into a SQLite database

In [None]:
import dataset
import tweepy
import sqlite3

# store_tweet connects to SQLite, creates a table called "tweets" if it doesn't already exist, 
# and inserts newly gathered data into the table
def store_tweet(item):
    db = dataset.connect('sqlite:///newTweets.db')
    table = db.create_table('tweets', primary_id=False)
    item_json = item._json.copy()
    for k, v in item_json.items():
        if isinstance(v, dict):
            item_json[k] = str(v)
    table.insert(item_json)
    

auth = tweepy.AppAuthHandler(API_KEY, API_SECRET)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

query = 'INSERT YOUR SEARCH TERM(S) HERE' # put your search term(s) inside the quotes before running this cell

# executing the query with specified settings
cursor = tweepy.Cursor(api.search, q=query, lang='en', result_type='popular')

# iterating through 5 pages of records to add each record to our database
for page in cursor.pages(5):
    for item in page:
        store_tweet(item)

# use SQLite3 to connect to the database we just put data into
conn = sqlite3.connect('newTweets.db')

# create a Pandas dataframe from a SQL query of our newTweets database
tweepyDF = pd.read_sql_query('SELECT * FROM tweets LIMIT 15;', conn)

# take a peek at the dataframe
tweepyDF.head()

### Print the username and text from first 15 Tweets in our new database
Our SQL query had a limit of 15, so tweepyDF only contains 15 records

In [None]:
# The 'user' column looks like a dictionary, but is formatted as a string
# I converted it to a dictionary to access the user's screen name

tweepyDF.user = tweepyDF.user.map(lambda x: dict(eval(x)))

for i in range(15):
    print("@" + tweepyDF.user[i]['screen_name'] + " - " + tweepyDF.text[i] + "\n____________________\n")

## 3. Using Tweepy to pull live data from Twitter's streaming API in JSON format

In [None]:
from tweepy.streaming import StreamListener 
from tweepy import OAuthHandler, Stream
import time

class Listener(StreamListener):
    # Setting a timer for 60 seconds and creating tweetStream.json if it doesn't already exist
    # 
    def __init__(self, time_limit=60):
        self.start_time = time.time()
        self.limit = time_limit
        self.saveFile = open('tweetStream.json', 'a')

    # makes sure the timer hasn't expired before continuing to write data
    def on_data(self, data):
        if (time.time() - self.start_time) < self.limit:
            self.saveFile.write(data)
            return True
        else:
            self.saveFile.close()
            return False
        
auth = OAuthHandler(API_KEY, API_SECRET)
auth.set_access_token(TOKEN_KEY, TOKEN_SECRET)

stream = Stream(auth, Listener()) # starting up the stream
stream.filter(track=['Warzone']) # telling the stream what to track

# load the data we just collected into tweets_data
tweets_data = []
with open('tweetStream.json', 'r') as tweets_file:
    for line in tweets_file:
        tweet = json.loads(line)
        tweets_data.append(tweet)

# turn tweets_data into a pandas dataframe and take a peek at the results
streamDF = pd.DataFrame(tweets_data)
streamDF.head()

### Print the username and text from the first 15 streaming Tweets

In [None]:
for i in range(15):
    print("@" + streamDF.user[i]['screen_name'] + " - " + streamDF.text[i] + "\n____________________\n")