In [1]:
# For sending GET requests from the API
import requests
# For saving access tokens and for file management when creating and adding to the dataset
import os
# For dealing with json responses we receive from the API
import json
# For displaying the data after
import pandas as pd
# For saving the response data in CSV format
import csv
# For parsing the dates received from twitter in readable formats
import datetime
import dateutil.parser
import unicodedata
#To add wait time between requests
import time

In [2]:
from dotenv import load_dotenv
load_dotenv()

import os
bearer_token = os.environ.get("TOKEN")

In [3]:
os.environ['TOKEN'] = "AAAAAAAAAAAAAAAAAAAAABaiVAEAAAAA%2FakBYpnMJAbQFkU%2BBEb4fVBxU7c%3DdSU24Vlpg8VXNSzchoEMEt1jGeJRZACbVaIkXWlguvpZVQBNe8"

In [4]:
def auth():
    return os.environ.get("TOKEN")

In [5]:
def bearer_oauth(r):
    """
    Method required by bearer token authentication.
    """

    r.headers["Authorization"] = f"Bearer {bearer_token}"
    r.headers["User-Agent"] = "v2RecentSearchPython"
    return r

In [6]:
def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers

In [7]:
def create_url(keyword, start_date, end_date, next_token):
    
    search_url = "https://api.twitter.com/2/tweets/search/recent" #Change to the endpoint you want to collect data from

    #change params based on the endpoint you are using
    query_params = {'query': keyword, 
                    'start_time' : start_list[i], 
                    'end_time' : end_list[i],
                    'max_results' : max_results,
                    'tweet.fields': 'created_at',
                    'next_token': {}
                     }
    
    return (search_url, query_params)

In [8]:
def connect_to_endpoint(url, headers, params, next_token):
    params['next_token'] = next_token   #params object received from create_url function
    response = requests.get( url, auth=bearer_oauth, params = params)
    print("Endpoint Response Code: " + str(response.status_code))
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

In [9]:
def append_to_csv(json_response, fileName):

    #A counter variable
    counter = 0

    #Open OR create the target CSV file
    csvFile = open(fileName, "a", newline="", encoding='utf-8')
    csvWriter = csv.writer(csvFile)

    #Loop through each tweet
    for tweet in json_response['data']:
        
        # We will create a variable for each since some of the keys might not exist for some tweets
        # So we will account for that

        #Time created
        created_at = dateutil.parser.parse(tweet['created_at'])

        #Tweet ID
        tweet_id = tweet['id']

        #Tweet text
        text = tweet['text']
        
        # Assemble all data in a list
        res = [created_at, tweet_id, text]
        
        # Append the result to the CSV file
        csvWriter.writerow(res)
        counter += 1

    # When done, close the CSV file
    csvFile.close()

    # Print the number of tweets for this iteration
    print("# of Tweets added from this response: ", counter)

In [10]:
# time_range = pd.read_csv("Twitter_start_end_times.csv")
# start_list = time_range['start_time'].tolist()
# end_list = time_range['end_time'].tolist()

In [11]:
#Inputs for tweets
bearer_token = auth()
headers = create_headers(bearer_token)
keyword = "tesla lang:en -from:tesla -is:retweet"
start_list = ['2021-10-30T16:00:00.000Z', '2021-10-30T17:00:00.000Z', '2021-10-30T18:00:00.000Z']
end_list = ['2021-10-30T16:59:59.000Z', '2021-10-30T17:59:59.000Z', '2021-10-30T18:59:59.000Z']

max_results = 100

#Total number of tweets we collected from the loop
total_tweets = 0

# Create file
csvFile = open("datatest.csv", "a", newline="", encoding='utf-8')
csvWriter = csv.writer(csvFile)

#Create headers for the data you want to save, in this example, we only want save these columns in our dataset
csvWriter.writerow(['created_at','id','tweet'])
csvFile.close()

for i in range(0,len(start_list)):

    # Inputs
    count = 0 # Counting tweets per time period
    max_count = 250 # Max tweets per time period
    flag = True
    next_token = None
    
    # Check if flag is true
    while flag:
        # Check if max_count reached
        if count >= max_count:
            break
        print("-------------------")
        print("Token: ", next_token)
        url = create_url(keyword, start_list[i], end_list[i], next_token)
        json_response = connect_to_endpoint(url[0], headers, url[1], next_token)
        result_count = json_response['meta']['result_count']

        if 'next_token' in json_response['meta']:
            # Save the token to use for next call
            next_token = json_response['meta']['next_token']
            print("Next Token: ", next_token)
            if result_count is not None and result_count > 0 and next_token is not None:
                print("Start Date: ", start_list[i])
                append_to_csv(json_response, "datatest.csv")
                count += result_count
                total_tweets += result_count
                print("Total # of Tweets added: ", total_tweets)
                print("-------------------")
                time.sleep(5)                
        # If no next token exists
        else:
            if result_count is not None and result_count > 0:
                print("-------------------")
                print("Start Date: ", start_list[i])
                append_to_csv(json_response, "datatest.csv")
                count += result_count
                total_tweets += result_count
                print("Total # of Tweets added: ", total_tweets)
                print("-------------------")
                time.sleep(5)
            
            #Since this is the final request, turn flag to false to move to the next time period.
            flag = False
            next_token = None
        time.sleep(5)
print("Total number of results: ", total_tweets)

-------------------
Token:  None
Endpoint Response Code: 200
Next Token:  b26v89c19zqg8o3fpdv7h4xq182dyyzss5hu7rvggo6m5
Start Date:  2021-10-30T16:00:00.000Z
# of Tweets added from this response:  99
Total # of Tweets added:  99
-------------------
-------------------
Token:  b26v89c19zqg8o3fpdv7h4xq182dyyzss5hu7rvggo6m5
Endpoint Response Code: 200
Next Token:  b26v89c19zqg8o3fpdv7h4xfky3hr82hhw6eocg8fx6yl
Start Date:  2021-10-30T16:00:00.000Z
# of Tweets added from this response:  99
Total # of Tweets added:  198
-------------------
-------------------
Token:  b26v89c19zqg8o3fpdv7h4xfky3hr82hhw6eocg8fx6yl
Endpoint Response Code: 200
Next Token:  b26v89c19zqg8o3fpdv7h4xfjdzddlbr5qg1dkgbx3drx
Start Date:  2021-10-30T16:00:00.000Z
# of Tweets added from this response:  100
Total # of Tweets added:  298
-------------------
-------------------
Token:  None
Endpoint Response Code: 200
Next Token:  b26v89c19zqg8o3fpdv7h6zrbs3n98byiow1m84w233p9
Start Date:  2021-10-30T17:00:00.000Z
# of Tweet