In [66]:
import os
import requests
import time

import pytz
from datetime import datetime, timezone, timedelta

import pandas as pd

### Helper Functions

In [2]:
# Set bearer tokens only 1st time when use the notebook
# Delete it for security after comply it.
os.environ['TOKEN'] = ''

In [67]:
def auth():
    ''' Get TOKEN from os environment variables'''
    return os.getenv("TOKEN")

def create_headers(bearer_token):
    ''' Generate request header using bearer token'''
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers



def create_url(keyword, start_date, end_date, max_results = 10):
    '''
    Reference fo search/all API endpoint
    https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-all
    '''
    search_url = "https://api.twitter.com/2/tweets/search/all" 

    '''
    "query" : https://developer.twitter.com/en/docs/twitter-api/tweets/counts/integrate/build-a-query
    "tweet.fields" : https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/tweet
    "user.fields" : https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/user 
    "place.fields" : https://developer.twitter.com/en/docs/twitter-api/data-dictionary/object-model/place
    '''

    query_params = {'query': keyword,
                    'start_time': start_date,
                    'end_time': end_date,
                    'max_results': max_results,
                    'expansions': 'author_id,in_reply_to_user_id,geo.place_id',
                    'tweet.fields': 'id,text,author_id,in_reply_to_user_id,geo,conversation_id,created_at,lang,public_metrics,referenced_tweets,reply_settings,source',
                    'user.fields': 'id,name,username,created_at,description,location,public_metrics,verified',
                    'place.fields': 'full_name,id,country,country_code,geo,name,place_type',
                    'next_token': {}}

                    
    return (search_url, query_params)

def retweet_url(id, max_results):

    retweet_url = f"https://api.twitter.com/2/tweets/{id}/retweeted_by"

    retweet_params ={
                    'expansions': 'pinned_tweet_id',
                    'max_results': max_results,
                    'tweet.fields': 'attachments,author_id,context_annotations,conversation_id,created_at,entities,geo,id,in_reply_to_user_id,lang,non_public_metrics,public_metrics,organic_metrics,promoted_metrics,possibly_sensitive,referenced_tweets,reply_settings,source,text,withheld',
                    'user.fields': 'created_at,description,entities,id,location,name,pinned_tweet_id,profile_image_url,protected,public_metrics,url,username,verified,withheld',
                    'pagination_token': {}}

    return (retweet_url, retweet_params)

### Main function 

In [68]:
def connect_to_endpoint(url, headers, params, next_token = None):
    params['next_token'] = next_token   # Use for seaching in next pages

    # Make request to API
    response = requests.request("GET", url, headers = headers, params = params)

    # Check API response code
    print("Endpoint Response Code: " + str(response.status_code))
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

### Credential Declaration

In [69]:
# Credential initiate for Twitter API
bearer_token = auth()
headers = create_headers(bearer_token)

### Query Dict Creation

In [74]:
'''
Search Parameters
Duration per keywords
2022-03-01T00:00:00.000Z
2017-01-01T00:00:00.000Z

Hash tag list
["#นัดเย็ดกทม", "#onsกทม", "#fwbกทม", "#นัดเย็ดกรุงเทพ"]

'''

keyword = 'นัดเย็ด กรุงเทพ OR นัดเย็ด กรุงเทพ. OR #นัดเย็ด #กรุงเทพ'
start_time = "2017-01-01T00:00:00.000Z"
end_time = "2022-03-01T00:00:00.000Z"
max_results = 500

# Create query dict for Twitter API
url = create_url(keyword, start_time,end_time, max_results)

### Tweets Lookup

In [65]:
tweet_id_list = pd.read_csv("./data/origin_tweet_fwb.csv").drop(columns = "Unnamed: 0")

### Make Request to API

In [75]:
df_total_tweet = pd.DataFrame()
df_total_user = pd.DataFrame()

end_search_set = True

page_token = None
while end_search_set:
    # Make request to API
    json_response = connect_to_endpoint(url[0], headers, url[1], page_token)

    # Change json data into Pandas DataFrame
    df_tweet_temp = pd.DataFrame(json_response['data']) 
    user_data_temp = pd.DataFrame(json_response["includes"]['users']) 

    # ------------ Display Records per pages ------------ #
    print("Pages token : ", page_token)
    print("Tweet number on current page : ", len(df_tweet_temp))
    print('---- *** ---- *** --- *** ---- *** --- ')
    print("")
    # ------------ Display Records per pages ------------ #
    
    # Combine data from eacth request per page
    df_total_tweet = df_total_tweet.append(df_tweet_temp)
    df_total_user = df_total_user.append(user_data_temp)

    if 'next_token' in json_response["meta"]:
        page_token = json_response["meta"]['next_token']
        time.sleep(5)
    else:
        end_search_set = False

Endpoint Response Code: 200
Pages token :  None
Tweet number on current page :  396
---- *** ---- *** --- *** ---- *** --- 

Endpoint Response Code: 200
Pages token :  b26v89c19zqg8o3fpe75b86w4hpekgzi45c97aybog2nx
Tweet number on current page :  436
---- *** ---- *** --- *** ---- *** --- 

Endpoint Response Code: 200
Pages token :  b26v89c19zqg8o3fpe46dvxohcxr7m7ml4h4x87005ail
Tweet number on current page :  291
---- *** ---- *** --- *** ---- *** --- 

Endpoint Response Code: 200
Pages token :  b26v89c19zqg8o3fpe18a9k72usy4h13uofs2xdhby6bh
Tweet number on current page :  252
---- *** ---- *** --- *** ---- *** --- 

Endpoint Response Code: 200
Pages token :  b26v89c19zqg8o3fpe170sprpasjja7fghhqd43vz4fp9
Tweet number on current page :  198
---- *** ---- *** --- *** ---- *** --- 

Endpoint Response Code: 200
Pages token :  b26v89c19zqg8o3fpdy8xts9xg0vrklqr7l7hr8kxk871
Tweet number on current page :  379
---- *** ---- *** --- *** ---- *** --- 

Endpoint Response Code: 200
Pages token :  b2

### Data Manipulation

In [None]:
df_total_tweet.shape

(155584, 12)

In [None]:
df_total_user.shape

(115352, 8)

In [None]:
def convert_datetime(dataframe:pd.DataFrame, datetime_column:str) -> pd.DataFrame:
    my_timezone = "Asia/Bangkok"
    dataframe[datetime_column + "_dt"] = pd.to_datetime(dataframe[datetime_column])
    dataframe[datetime_column +"_time"] = dataframe[datetime_column + "_dt"].dt.time
    dataframe[datetime_column +"_date"] = dataframe[datetime_column + "_dt"].dt.date

    dataframe[datetime_column + "_dt_thtz"] = dataframe[datetime_column + "_dt"].dt.tz_convert(my_timezone)
    dataframe[datetime_column + "_time_thtz"] = dataframe[datetime_column + "_dt_thtz"].dt.time
    dataframe[datetime_column + "_date_thtz"] = dataframe[datetime_column + "_dt_thtz"].dt.date
    return dataframe

In [None]:
# Conver datetime to Thailand timezone
df_total_tweet = convert_datetime(df_total_tweet, "created_at")
df_total_user = convert_datetime(df_total_user, "created_at")

### Export to .csv format with UTF-8 encoding

In [None]:
df_total_tweet.to_csv(f"{keyword}_{str(df_total_tweet['created_at_dt_thtz'].max())}_{str(df_total_tweet['created_at_dt_thtz'].min())}_tweetdata.csv", encoding='utf-8-sig')
df_total_user.to_csv(f"{keyword}_{str(df_total_user['created_at_dt_thtz'].max())}_{str(df_total_user['created_at_dt_thtz'].min())}_userdata.csv", encoding='utf-8-sig')