# Search for tweets

### Import libraries

In [1]:
import numpy as np
import pandas as pd
import twitter
import time
import collections
from datetime import datetime, timedelta
from pytz import timezone

### Bring your twitter's keys

In [2]:
# Call your twitter keys
keys = ! ../keys/twitter_config.py

# Save keys to use them later
api = twitter.Api(consumer_key = keys[0],
                  consumer_secret = keys[1],
                  access_token_key = keys[2],
                  access_token_secret = keys[3],
                  sleep_on_rate_limit=True #
                 )

### Parameters

In [3]:
# twitter accounts
accounts = ("CanadianPM", "Canada", "OpenGovCan", "GovCanHealth")

# dates
# format YYYY-MM-DD, and has a 7-day limit (no tweets will be found 
# for a date older than one week.)
today = datetime.now()#timezone('US/Pacific'))
dates = list()
for d in range(7, -2, -1):
    aux = today - timedelta(days=d)
    dates.append(aux.strftime("%Y-%m-%d"))

today = today.strftime("%Y-%m-%d")

# tweets
num_tweets = 100
max_tweets = 180
time_epoc = 900 # 15 minutes = 900 seconds
time_waiting = time_epoc / max_tweets

### Restrictions
The standard API rate limits described in the [Twitter developer's documents](https://developer.twitter.com/en/docs/tweets/search/api-reference/get-search-tweets) related with GET (read) endpoints are shown in the next table.  

Note that endpoints not listed in the chart default to 15 requests per allotted user. All request windows are *15 minutes in length*.  These rate limits apply to the standard API endpoints only.

| Endpoint | Resource family | Requests / window (user auth) | Requests / window (app auth) |
|---|:--:|:---:|:---:|
|GET search/tweets | search | 180 | 450 |

Also, the search index has a 7-day limit, which means that no tweets will be found for a date older than one week.

### Retrieve tweets

In [4]:
# dictionary of dictionaries
results = collections.defaultdict(dict)
print("--- Retrieving tweets ---")
print("\nDATE       : ACCOUNT")
print("---------- : -------")
for ac in accounts:
    for d in range(0, len(dates)-1):
#     for da in dates:
        print(dates[d] + " : " + ac)
        results[ac][dates[d]] = api.GetSearch(
            raw_query = "q=" + ac + 
                        "%20&until=" + dates[d+1] +
                        "&count=" + str(num_tweets)
        )
        time.sleep(time_waiting)
print("\n--- Completed ---")

--- Retrieving tweets ---

DATE       : ACCOUNT
---------- : -------
2020-08-06 : CanadianPM
2020-08-07 : CanadianPM
2020-08-08 : CanadianPM
2020-08-09 : CanadianPM
2020-08-10 : CanadianPM
2020-08-11 : CanadianPM
2020-08-12 : CanadianPM
2020-08-13 : CanadianPM
2020-08-06 : Canada
2020-08-07 : Canada
2020-08-08 : Canada
2020-08-09 : Canada
2020-08-10 : Canada
2020-08-11 : Canada
2020-08-12 : Canada
2020-08-13 : Canada
2020-08-06 : OpenGovCan
2020-08-07 : OpenGovCan
2020-08-08 : OpenGovCan
2020-08-09 : OpenGovCan
2020-08-10 : OpenGovCan
2020-08-11 : OpenGovCan
2020-08-12 : OpenGovCan
2020-08-13 : OpenGovCan
2020-08-06 : GovCanHealth
2020-08-07 : GovCanHealth
2020-08-08 : GovCanHealth
2020-08-09 : GovCanHealth
2020-08-10 : GovCanHealth
2020-08-11 : GovCanHealth
2020-08-12 : GovCanHealth
2020-08-13 : GovCanHealth

--- Completed ---


In [5]:
print("accounts:", accounts)
print("dates:", dates[:-1])

accounts: ('CanadianPM', 'Canada', 'OpenGovCan', 'GovCanHealth')
dates: ['2020-08-06', '2020-08-07', '2020-08-08', '2020-08-09', '2020-08-10', '2020-08-11', '2020-08-12', '2020-08-13']


In [6]:
## Uncomment and run this cell to see the retrieved results
# results

#### Example:

In [7]:
# First tweet by the public to the 'Canada' account on the last day of the period.
print('Account: Canada')
print('Date:', today)
results['CanadianPM'][today][0]

Account: Canada
Date: 2020-08-13


Status(ID=1293779114701316096, ScreenName=19blocksup, Created=Thu Aug 13 05:19:12 +0000 2020, Text='@GovCanHealth @CPHO_Canada @CanadianPM Dr. Sanjay Gupta: Why I am not sending my kids back to school\nhttps://t.co/dJ8tdCX7Ih')

### Load dictionaries (if exist) and save tweets

In [8]:
# Load 
#  - retrieve dictionaries if they exist,
#  - if they doesn't exist, it creates them,
#  - finally adds the tweets from the last 7 days.
results_all = collections.defaultdict(dict)
for ac in accounts: 
    try:
        results_all[ac] = np.load('../tweets/tweets_' +ac+ '.npy',allow_pickle='TRUE').item()
    except:
        next
    for d in range(0, len(dates)-1):
        results_all[ac][dates[d]] = results[ac][dates[d]]

In [9]:
## Uncomment and run this cell to see all the retrieved results over time
# results_all

In [10]:
# Save
for ac in accounts:
    np.save('../tweets/tweets_' + ac + '.npy', results_all[ac])