# Retrieving Tweets

The purpose of this notebook is to prototype and test code for retrieving tweets.

In [1]:
# Imports
import logging
import json
import tweepy
import random
import time

from pprint import pprint
from pymongo import MongoClient

# Settings
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

In [2]:
# Authentication
with open('credentials.json') as creds:
    credentials = json.load(creds)

consumer_key = credentials['consumer_key']
consumer_secret = credentials['consumer_secret']
access_token = credentials['access_token']
access_token_secret = credentials['access_token_secret']

auth = tweepy.OAuthHandler(consumer_key=consumer_key, consumer_secret=consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

# User info
user = api.me()
print(user.name)

wildlyclassyprince


In [3]:
# Instantiate DB client connection
client = MongoClient()

# Create DB object
db = client.TweetDetails

In [4]:
# Random number generator
def id_gen():
    '''Generates random ID number.'''
    start, end = 10 ** 4, 10 ** 5
    return random.randint(start, end)

# Rate limiter
def limit_handler(cursor):
    '''Limit the number of results returned.'''
    while True:
        try:
            yield cursor.next()
        except tweepy.RateLimitError:
            # Wait 15 minutes before continuing ...
            print('Waiting for 15 minutes ...')
            time.sleep(15 * 60)
            print('Continuing ...')
        except StopIteration:
            break

# Query and insertion function
def search_retrieve_insert(query):
    '''Inserts data into the database.'''
    try:
        # Get the data but pause when limit is reached ...
        # Remove .items(number) to return all possible query results
        for tweet in limit_handler(tweepy.Cursor(api.search, query).items()):
            # Insert data
            db.TweetDetails.insert_one({
                'id': id_gen(),
                'query': query,
                'text': tweet.text,
                'created_at': tweet.created_at,
                'full_name' : tweet.author.name,
                'screen_name': tweet.author.screen_name,
                'location': tweet.author.location,
                'description': tweet.author.description,
                'source': tweet.source,
                'statuses_count': tweet.user.statuses_count,
                'retweet_count': tweet.retweet_count,
                'favorited': tweet.favorited
            })
    except Exception as e:
        print(str(e))
        
# Reading records
def read():
    '''Reads data from the database.'''
    try:
        tweets = db.TweetDetails.find()
        for tweet in tweets:
            pprint(tweet)
    except Exception as e:
        print(str(e))

In [None]:
# Insertion
search_retrieve_insert(query='Zimbabwe')

In [None]:
# Reading
read()