In [1]:
## Import Library
import json
import os
import time
from dataclasses import dataclass, field
from datetime import datetime as dt
from typing import NamedTuple
import logging

import urllib3
import config

import pandas as pd

In [2]:
## Authentification
URL  = 'https://api.twitter.com/2/tweets/search/all'
KEY = config.BEARER_TOKEN

In [3]:
@dataclass
class ResponseDataSet:
    data: list[dict] = field(default_factory=list) # {"id": "1417073041876553729","created_at": "2021-07-19T10:45:15.000Z","text":"hoge"}
    users: list[dict] = field(default_factory=list) 
    tweets: list[dict] = field(default_factory=list)

In [4]:
class RequestItems(NamedTuple):
    http: urllib3.PoolManager
    key: str
    params: dict

In [5]:
def request_tweets(ritems: RequestItems):
    headers = {'Authorization': 'Bearer '+ ritems.key}
    return ritems.http.request('GET', URL, fields=ritems.params, headers=headers)

In [6]:
def simple_get_tweets(ritems: RequestItems):
    resp = request_tweets(ritems)
    return resp

In [7]:
logging.basicConfig(level=logging.DEBUG)
logging.info('info')

INFO:root:info


In [8]:

def get_tweets(ritems:RequestItems, next_token:None, save_folder:str, 
              times=1, max_retry=10):
    """timesの回数分だけTweetリクエストを送り､データをjson形式で保存する"""
    
    retry_count = 0
    
    if not os.path.exists(save_folder):
        os.mkdir(save_folder)

    for i in range(times):
        res = simple_get_tweets(ritems)
        logging.info(f'{i+1}回目{res.status}')
        
        if res.status == 429: 
            # ツイート取得の制限がかかった際に､制限解除までmax_retry回まで待機する
            if retry_count < max_retry:
                retry_count += 1
                logging.info(f'{retry_count}回目のリトライ')
                time.sleep(retry_count * 10)
            else:
                raise MaxRetryError('リトライ上限に達しました')
            
        elif res.status != 200:
            logging.error(res.header, res.reason)
            raise Exception('エラーが発生しました')
        
        res_data = json.loads(res.data)
        
        # 何時から何時までのツイートを取得できたかをlogに残す
        logging.info(f"start_{res_data['data'][0]['created_at']},end_{res_data['data'][-1]['created_at']}")
        
        # ツイート取得を一連の流れとして行うために､メタデータからnext_tokenを取得する｡
        try:
            next_token = res_data['meta']['next_token']
        except KeyError:
            next_token = None
            with open(f'{save_folder}/{dt.now().strftime("%m%d_%H%M%S")}.json', mode='w') as f:
                json.dump(res_data, f , ensure_ascii=False, sort_keys=True)
                f.write('\n')
            break

        if next_token:
            ritems.params['next_token'] = next_token
        with open(f'{save_folder}/{dt.now().strftime("%m%d_%H%M%S")}.json', mode='w') as f:
            json.dump(res_data, f , ensure_ascii=False, sort_keys=True)
            f.write('\n')
        # 制限レートにひっかからないように､適当な時間待機する
        time.sleep(20)

    return res, next_token

class MaxRetryError(Exception):
    pass


In [10]:
if 'next_token' not in globals():
    next_token = ''

http = urllib3.PoolManager()

params = {
        'query':'夫婦別姓 -is:verified',
        'max_results':500,
        'start_time':'2022-08-01T00:00:00Z',
        'end_time':'2022-08-17T03:40:49Z',
        'tweet.fields':'author_id,conversation_id,created_at,id,in_reply_to_user_id,entities,public_metrics,referenced_tweets',
        'user.fields':'created_at,description,id,name,public_metrics,url,username,verified',
        'expansions':'author_id,entities.mentions.username,in_reply_to_user_id,referenced_tweets.id,referenced_tweets.id.author_id'
        }

# res = simple_get_tweets(ritems)

ritems = RequestItems(http, KEY, params)
res, next_token = get_tweets(ritems=ritems, next_token=next_token, times=200, save_folder='/Users/yuikondo/Thesis/data/august/notverified')

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.twitter.com:443
DEBUG:urllib3.connectionpool:https://api.twitter.com:443 "GET /2/tweets/search/all?query=%E5%A4%AB%E5%A9%A6%E5%88%A5%E5%A7%93+-is%3Averified&max_results=500&start_time=2022-08-01T00%3A00%3A00Z&end_time=2022-08-17T03%3A40%3A49Z&tweet.fields=author_id%2Cconversation_id%2Ccreated_at%2Cid%2Cin_reply_to_user_id%2Centities%2Cpublic_metrics%2Creferenced_tweets&user.fields=created_at%2Cdescription%2Cid%2Cname%2Cpublic_metrics%2Curl%2Cusername%2Cverified&expansions=author_id%2Centities.mentions.username%2Cin_reply_to_user_id%2Creferenced_tweets.id%2Creferenced_tweets.id.author_id HTTP/1.1" 200 818547
INFO:root:1回目200
INFO:root:start_2022-08-17T03:40:23.000Z,end_2022-08-16T23:02:43.000Z
DEBUG:urllib3.connectionpool:https://api.twitter.com:443 "GET /2/tweets/search/all?query=%E5%A4%AB%E5%A9%A6%E5%88%A5%E5%A7%93+-is%3Averified&max_results=500&start_time=2022-08-01T00%3A00%3A00Z&end_time=2022-08-17T03%3A40%3A49Z&twe

DEBUG:urllib3.connectionpool:https://api.twitter.com:443 "GET /2/tweets/search/all?query=%E5%A4%AB%E5%A9%A6%E5%88%A5%E5%A7%93+-is%3Averified&max_results=500&start_time=2022-08-01T00%3A00%3A00Z&end_time=2022-08-17T03%3A40%3A49Z&tweet.fields=author_id%2Cconversation_id%2Ccreated_at%2Cid%2Cin_reply_to_user_id%2Centities%2Cpublic_metrics%2Creferenced_tweets&user.fields=created_at%2Cdescription%2Cid%2Cname%2Cpublic_metrics%2Curl%2Cusername%2Cverified&expansions=author_id%2Centities.mentions.username%2Cin_reply_to_user_id%2Creferenced_tweets.id%2Creferenced_tweets.id.author_id&next_token=b26v89c19zqg8o3fpz5nhju7p2pihjkkes1obxz8dlpj1 HTTP/1.1" 200 840066
INFO:root:12回目200
INFO:root:start_2022-08-13T13:28:30.000Z,end_2022-08-13T10:57:44.000Z
DEBUG:urllib3.connectionpool:https://api.twitter.com:443 "GET /2/tweets/search/all?query=%E5%A4%AB%E5%A9%A6%E5%88%A5%E5%A7%93+-is%3Averified&max_results=500&start_time=2022-08-01T00%3A00%3A00Z&end_time=2022-08-17T03%3A40%3A49Z&tweet.fields=author_id%2Cconv

INFO:root:23回目200
INFO:root:start_2022-08-12T04:53:38.000Z,end_2022-08-12T01:21:17.000Z
DEBUG:urllib3.connectionpool:https://api.twitter.com:443 "GET /2/tweets/search/all?query=%E5%A4%AB%E5%A9%A6%E5%88%A5%E5%A7%93+-is%3Averified&max_results=500&start_time=2022-08-01T00%3A00%3A00Z&end_time=2022-08-17T03%3A40%3A49Z&tweet.fields=author_id%2Cconversation_id%2Ccreated_at%2Cid%2Cin_reply_to_user_id%2Centities%2Cpublic_metrics%2Creferenced_tweets&user.fields=created_at%2Cdescription%2Cid%2Cname%2Cpublic_metrics%2Curl%2Cusername%2Cverified&expansions=author_id%2Centities.mentions.username%2Cin_reply_to_user_id%2Creferenced_tweets.id%2Creferenced_tweets.id.author_id&next_token=b26v89c19zqg8o3fpz5n2msohkzgw0r62zzqywcflrla5 HTTP/1.1" 200 852898
INFO:root:24回目200
INFO:root:start_2022-08-12T01:21:16.000Z,end_2022-08-11T21:00:36.000Z
DEBUG:urllib3.connectionpool:https://api.twitter.com:443 "GET /2/tweets/search/all?query=%E5%A4%AB%E5%A9%A6%E5%88%A5%E5%A7%93+-is%3Averified&max_results=500&start_time=

INFO:root:32回目200
INFO:root:start_2022-08-10T21:43:01.000Z,end_2022-08-10T15:26:50.000Z
DEBUG:urllib3.connectionpool:https://api.twitter.com:443 "GET /2/tweets/search/all?query=%E5%A4%AB%E5%A9%A6%E5%88%A5%E5%A7%93+-is%3Averified&max_results=500&start_time=2022-08-01T00%3A00%3A00Z&end_time=2022-08-17T03%3A40%3A49Z&tweet.fields=author_id%2Cconversation_id%2Ccreated_at%2Cid%2Cin_reply_to_user_id%2Centities%2Cpublic_metrics%2Creferenced_tweets&user.fields=created_at%2Cdescription%2Cid%2Cname%2Cpublic_metrics%2Curl%2Cusername%2Cverified&expansions=author_id%2Centities.mentions.username%2Cin_reply_to_user_id%2Creferenced_tweets.id%2Creferenced_tweets.id.author_id&next_token=b26v89c19zqg8o3fpz5n2c4qwx96yalukss8purughwfx HTTP/1.1" 200 906237
INFO:root:33回目200
INFO:root:start_2022-08-10T15:26:37.000Z,end_2022-08-10T11:20:57.000Z
DEBUG:urllib3.connectionpool:https://api.twitter.com:443 "GET /2/tweets/search/all?query=%E5%A4%AB%E5%A9%A6%E5%88%A5%E5%A7%93+-is%3Averified&max_results=500&start_time=

INFO:root:start_2022-08-09T01:06:29.000Z,end_2022-08-08T21:00:29.000Z
DEBUG:urllib3.connectionpool:https://api.twitter.com:443 "GET /2/tweets/search/all?query=%E5%A4%AB%E5%A9%A6%E5%88%A5%E5%A7%93+-is%3Averified&max_results=500&start_time=2022-08-01T00%3A00%3A00Z&end_time=2022-08-17T03%3A40%3A49Z&tweet.fields=author_id%2Cconversation_id%2Ccreated_at%2Cid%2Cin_reply_to_user_id%2Centities%2Cpublic_metrics%2Creferenced_tweets&user.fields=created_at%2Cdescription%2Cid%2Cname%2Cpublic_metrics%2Curl%2Cusername%2Cverified&expansions=author_id%2Centities.mentions.username%2Cin_reply_to_user_id%2Creferenced_tweets.id%2Creferenced_tweets.id.author_id&next_token=b26v89c19zqg8o3fpz5mnf0idz75hb9036le8lib0ulx9 HTTP/1.1" 200 810837
INFO:root:44回目200
INFO:root:start_2022-08-08T21:00:29.000Z,end_2022-08-08T14:56:23.000Z
DEBUG:urllib3.connectionpool:https://api.twitter.com:443 "GET /2/tweets/search/all?query=%E5%A4%AB%E5%A9%A6%E5%88%A5%E5%A7%93+-is%3Averified&max_results=500&start_time=2022-08-01T00%3A00

DEBUG:urllib3.connectionpool:https://api.twitter.com:443 "GET /2/tweets/search/all?query=%E5%A4%AB%E5%A9%A6%E5%88%A5%E5%A7%93+-is%3Averified&max_results=500&start_time=2022-08-01T00%3A00%3A00Z&end_time=2022-08-17T03%3A40%3A49Z&tweet.fields=author_id%2Cconversation_id%2Ccreated_at%2Cid%2Cin_reply_to_user_id%2Centities%2Cpublic_metrics%2Creferenced_tweets&user.fields=created_at%2Cdescription%2Cid%2Cname%2Cpublic_metrics%2Curl%2Cusername%2Cverified&expansions=author_id%2Centities.mentions.username%2Cin_reply_to_user_id%2Creferenced_tweets.id%2Creferenced_tweets.id.author_id&next_token=b26v89c19zqg8o3fpz5mn4drw2a9j2ind4rbr34rc7jwd HTTP/1.1" 200 929223
INFO:root:55回目200
INFO:root:start_2022-08-07T13:41:34.000Z,end_2022-08-07T11:03:48.000Z
DEBUG:urllib3.connectionpool:https://api.twitter.com:443 "GET /2/tweets/search/all?query=%E5%A4%AB%E5%A9%A6%E5%88%A5%E5%A7%93+-is%3Averified&max_results=500&start_time=2022-08-01T00%3A00%3A00Z&end_time=2022-08-17T03%3A40%3A49Z&tweet.fields=author_id%2Cconv

INFO:root:66回目200
INFO:root:start_2022-08-05T14:14:34.000Z,end_2022-08-05T11:58:36.000Z
DEBUG:urllib3.connectionpool:https://api.twitter.com:443 "GET /2/tweets/search/all?query=%E5%A4%AB%E5%A9%A6%E5%88%A5%E5%A7%93+-is%3Averified&max_results=500&start_time=2022-08-01T00%3A00%3A00Z&end_time=2022-08-17T03%3A40%3A49Z&tweet.fields=author_id%2Cconversation_id%2Ccreated_at%2Cid%2Cin_reply_to_user_id%2Centities%2Cpublic_metrics%2Creferenced_tweets&user.fields=created_at%2Cdescription%2Cid%2Cname%2Cpublic_metrics%2Curl%2Cusername%2Cverified&expansions=author_id%2Centities.mentions.username%2Cin_reply_to_user_id%2Creferenced_tweets.id%2Creferenced_tweets.id.author_id&next_token=b26v89c19zqg8o3fpz5m854ifd3o0nljgtpr6dqtslail HTTP/1.1" 200 841130
INFO:root:67回目200
INFO:root:start_2022-08-05T11:58:25.000Z,end_2022-08-05T08:44:42.000Z
DEBUG:urllib3.connectionpool:https://api.twitter.com:443 "GET /2/tweets/search/all?query=%E5%A4%AB%E5%A9%A6%E5%88%A5%E5%A7%93+-is%3Averified&max_results=500&start_time=

INFO:root:start_2022-08-04T03:03:20.000Z,end_2022-08-04T01:43:23.000Z
DEBUG:urllib3.connectionpool:https://api.twitter.com:443 "GET /2/tweets/search/all?query=%E5%A4%AB%E5%A9%A6%E5%88%A5%E5%A7%93+-is%3Averified&max_results=500&start_time=2022-08-01T00%3A00%3A00Z&end_time=2022-08-17T03%3A40%3A49Z&tweet.fields=author_id%2Cconversation_id%2Ccreated_at%2Cid%2Cin_reply_to_user_id%2Centities%2Cpublic_metrics%2Creferenced_tweets&user.fields=created_at%2Cdescription%2Cid%2Cname%2Cpublic_metrics%2Curl%2Cusername%2Cverified&expansions=author_id%2Centities.mentions.username%2Cin_reply_to_user_id%2Creferenced_tweets.id%2Creferenced_tweets.id.author_id&next_token=b26v89c19zqg8o3fpz5m7uga8dzjx2kfag58jewh673p9 HTTP/1.1" 200 701045
INFO:root:78回目200
INFO:root:start_2022-08-04T01:43:16.000Z,end_2022-08-04T00:44:01.000Z
DEBUG:urllib3.connectionpool:https://api.twitter.com:443 "GET /2/tweets/search/all?query=%E5%A4%AB%E5%A9%A6%E5%88%A5%E5%A7%93+-is%3Averified&max_results=500&start_time=2022-08-01T00%3A00

DEBUG:urllib3.connectionpool:https://api.twitter.com:443 "GET /2/tweets/search/all?query=%E5%A4%AB%E5%A9%A6%E5%88%A5%E5%A7%93+-is%3Averified&max_results=500&start_time=2022-08-01T00%3A00%3A00Z&end_time=2022-08-17T03%3A40%3A49Z&tweet.fields=author_id%2Cconversation_id%2Ccreated_at%2Cid%2Cin_reply_to_user_id%2Centities%2Cpublic_metrics%2Creferenced_tweets&user.fields=created_at%2Cdescription%2Cid%2Cname%2Cpublic_metrics%2Curl%2Cusername%2Cverified&expansions=author_id%2Centities.mentions.username%2Cin_reply_to_user_id%2Creferenced_tweets.id%2Creferenced_tweets.id.author_id&next_token=b26v89c19zqg8o3fpz5lsxefzcxx5jarlw7g0knnl2kcd HTTP/1.1" 200 772466
INFO:root:89回目200
INFO:root:start_2022-08-02T12:31:08.000Z,end_2022-08-02T10:54:22.000Z
DEBUG:urllib3.connectionpool:https://api.twitter.com:443 "GET /2/tweets/search/all?query=%E5%A4%AB%E5%A9%A6%E5%88%A5%E5%A7%93+-is%3Averified&max_results=500&start_time=2022-08-01T00%3A00%3A00Z&end_time=2022-08-17T03%3A40%3A49Z&tweet.fields=author_id%2Cconv