In [1]:
import tweepy, cnfg, sys, os, logging, json

## Set up Twitter API pull, and enable logging of progress

In [2]:
logging.basicConfig(filename='twitter_error_log.log',filemode='w', level=logging.ERROR)

In [3]:
config = cnfg.load(".twitter_config")
consumer_key = config['consumer_key']
consumer_secret = config['consumer_secret']

In [4]:
auth = tweepy.AppAuthHandler(consumer_key, consumer_secret)
 
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True,
                 parser=tweepy.parsers.JSONParser()
                )
 
if (not api):
    print ("Can't Authenticate")
    sys.exit(-1)

In [5]:
# Twitter API search terms
climate_search = ('globalwarming OR (global AND warming) OR #climate OR (climate AND (science OR scientist OR scientists OR alarmist OR alarmists OR change OR realist OR realists OR denial OR denier OR deniers)) OR climatechange OR climaterealists OR #agw')
searchQuery = climate_search

## Download as many search-relevant tweets from the API as possible, while respecting the rate limit.
This code is largely taken from https://www.karambelkar.info/2015/01/how-to-use-twitters-search-rest-api-most-effectively./  
I tweaked it to save json files every 4000 tweets, and to use logging rather than printing.

In [7]:
maxTweets = 2000000000 # Some arbitrary large number
tweetsPerQry = 100  # this is the max the API permits

# If results from a specific ID onwards are reqd, set since_id to that ID.
# else default to no lower limit, go as far back as API allows
sinceId = None

# If results only below a specific ID are reqd, set max_id to that ID.
# else default to no upper limit, start from the most recent tweet matching the search query.
max_id = -1L

tweetCount = 0
tweetFiles = 0
msg =  "Downloading max {0} tweets".format(maxTweets)
logging.error(msg)
tweets = []
while 4000*tweetFiles + tweetCount < maxTweets:
    try:
        if (max_id <= 0):
            if (not sinceId):
                new_tweets = api.search(q=searchQuery, count=tweetsPerQry)
            else:
                new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
                                        since_id=sinceId)
        else:
            if (not sinceId):
                new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
                                        max_id=str(max_id - 1))
            else:
                new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
                                        max_id=str(max_id - 1),
                                        since_id=sinceId)
                
        new_tweets = new_tweets['statuses']
        if len(new_tweets) == 0:
            msg = 'No tweets found'
            logging.error(msg)
            break
        
        tweets.extend(new_tweets)
        tweetCount += len(new_tweets)
        msg = "Downloaded {0} tweets".format(tweetCount + tweetFiles*4000)
        logging.error(msg)
        max_id = new_tweets[-1]['id']
        
        if tweetCount > 4000:
            with open('tweets'+str(tweetFiles)+'.json', 'wb') as outfile:  
                json.dump(tweets, outfile)
            msg = "JSON file saved"
            logging.error(msg)
            tweetFiles += 1
            tweetCount = 0
            del tweets[:]
                
    except tweepy.TweepError as e:
        msg = 'Query failed when max_id equaled {0}: {1}'.format(max_id, e)
        logging.error(msg)

if tweetCount > 0:
    with open('tweets'+str(tweetFiles)+'.json', 'wb') as outfile:  
        json.dump(tweets, outfile)
    msg = "JSON file saved"
    logging.error(msg)
    tweetFiles += 1
    tweetCount = 0

## Open one file downloaded with the above code and check the result

In [8]:
with open('tweets0.json') as json_file:  
    test = json.load(json_file)

In [15]:
len(test)

200

In [17]:
test[0]

{u'contributors': None,
 u'coordinates': None,
 u'created_at': u'Mon Apr 03 19:42:19 +0000 2017',
 u'entities': {u'hashtags': [],
  u'media': [{u'display_url': u'pic.twitter.com/BA8OAE8Gy8',
    u'expanded_url': u'https://twitter.com/postgreen/status/848983563664011264/photo/1',
    u'id': 848983535767748608,
    u'id_str': u'848983535767748608',
    u'indices': [114, 137],
    u'media_url': u'http://pbs.twimg.com/tweet_video_thumb/C8gyBxWXgAAFsIf.jpg',
    u'media_url_https': u'https://pbs.twimg.com/tweet_video_thumb/C8gyBxWXgAAFsIf.jpg',
    u'sizes': {u'large': {u'h': 224, u'resize': u'fit', u'w': 400},
     u'medium': {u'h': 224, u'resize': u'fit', u'w': 400},
     u'small': {u'h': 190, u'resize': u'fit', u'w': 340},
     u'thumb': {u'h': 150, u'resize': u'crop', u'w': 150}},
    u'source_status_id': 848983563664011264,
    u'source_status_id_str': u'848983563664011264',
    u'source_user_id': 14493552,
    u'source_user_id_str': u'14493552',
    u'type': u'photo',
    u'url': u'ht

## I can run this periodically to check my rate limit usage

In [12]:
import requests
from application_only_auth import Client
client = Client(consumer_key, consumer_secret)

In [13]:
response = client.request(
    "https://api.twitter.com/1.1/application/rate_limit_status.json?resources=help,users,search,statuses")

In [14]:
response

{u'rate_limit_context': {u'application': u'2bKlUYBcnpuQwlTjZvxjbiPjF'},
 u'resources': {u'help': {u'/help/configuration': {u'limit': 15,
    u'remaining': 15,
    u'reset': 1491249531},
   u'/help/languages': {u'limit': 15, u'remaining': 15, u'reset': 1491249531},
   u'/help/privacy': {u'limit': 15, u'remaining': 15, u'reset': 1491249531},
   u'/help/settings': {u'limit': 15, u'remaining': 15, u'reset': 1491249531},
   u'/help/tos': {u'limit': 15, u'remaining': 15, u'reset': 1491249531}},
  u'search': {u'/search/tweets': {u'limit': 450,
    u'remaining': 448,
    u'reset': 1491249455}},
  u'statuses': {u'/statuses/lookup': {u'limit': 300,
    u'remaining': 300,
    u'reset': 1491249531},
   u'/statuses/oembed': {u'limit': 180,
    u'remaining': 180,
    u'reset': 1491249531},
   u'/statuses/retweeters/ids': {u'limit': 300,
    u'remaining': 300,
    u'reset': 1491249531},
   u'/statuses/retweets/:id': {u'limit': 300,
    u'remaining': 300,
    u'reset': 1491249531},
   u'/statuses/show