# Search Mode

In [None]:
import os
import tweepy
import csv
import pandas as pd

def get_tweet_data(tweet):
    """ A function that extracts the info from a given tweet
    """
    # Gather any hashtags
    hashtags = []
    for hashtag in tweet.entities["hashtags"]:
        hashtags.append(hashtag["text"])
        
    # Extract the info
    row_info = {'user_name': tweet.user.name, 
            'user_location': tweet.user.location,
            'user_description': tweet.user.description,
            'user_created': tweet.user.created_at,
            'user_followers': tweet.user.followers_count,
            'user_friends': tweet.user.friends_count,
            'user_favourites': tweet.user.favourites_count,
            'user_verified': tweet.user.verified,
            'date': tweet.created_at,
            'full_text': tweet.full_text, 
            'hashtags': hashtags if hashtags else None,
            'source': tweet.source}
    return row_info

consumer_key = os.environ["CONSUMER_KEY"]
consumer_secret = os.environ["CONSUMER_SECRET"]

auth = tweepy.AppAuthHandler(consumer_key, consumer_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

# Define Consts
search_words = "#covid19 -filter:retweets"
date_list = pd.bdate_range('2020-03-01', '2020-11-01').date
columns = ['user_name','user_location', 'user_description', 'user_created', 'user_followers', 'user_friends',
          'user_favourites', 'user_verified', 'date', 'full_text', 'hashtags', 'source']

with open(r'covid_tweets.csv', 'a', newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=columns)
    for since, until in zip(date_list, date_list[1:]):
        for tweet in tweepy.Cursor(api.search,
                               q=search_words,
                               lang="en",
                               since=since,
                               until = until,
                               tweet_mode='extended').items(10):
            row_info = get_tweet_data(tweet)
            writer.writerow(row_info)

In [None]:
df = pd.read_csv('covid_tweets.csv', header=None)
df.columns = columns
df

# Download by ID

In [None]:
import os
import tweepy
import csv
import pandas as pd
from tweet import config


def get_tweet_data(tweet_id):
    """ A function that extracts the info from a given tweet_id
    """
    # Ping API for tweet
    tweet = api.get_status(tweet_id, tweet_mode='extended')
    
    # Check if is retweet
    if hasattr(tweet, "retweeted_status"):
        tweet = tweet.retweeted_status
            
    # Gather any hashtags
    hashtags = []
    for hashtag in tweet.entities["hashtags"]:
        hashtags.append(hashtag["text"])
        
    # Extract the info
    row_info = {'user_name': tweet.user.name, 
            'user_location': tweet.user.location,
            'user_description': tweet.user.description,
            'user_created': tweet.user.created_at,
            'user_followers': tweet.user.followers_count,
            'user_friends': tweet.user.friends_count,
            'user_favourites': tweet.user.favourites_count,
            'user_verified': tweet.user.verified,
            'date': tweet.created_at,
            'full_text': tweet.full_text, 
            'hashtags': hashtags if hashtags else None,
            'source': tweet.source}
    return row_info


consumer_key = os.environ["CONSUMER_KEY"]
consumer_secret = os.environ["CONSUMER_SECRET"]

auth = tweepy.AppAuthHandler(consumer_key, consumer_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

# Define dirs
save_dir = config.data / 'covid19_id_reduced'
f_list = sorted(save_dir.iterdir())
    
columns = ['user_name','user_location', 'user_description', 'user_created', 'user_followers', 'user_friends',
          'user_favourites', 'user_verified', 'date', 'full_text', 'hashtags', 'source']

with open(r'covid_tweets.csv', 'a', newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=columns)
    for date_df in f_list[-1:]:
        date = pd.read_csv(date_df).columns[0]
        IDs = pd.read_csv(date_df).values.flatten()
        for tweet_id in IDs[:10]:
            try:
                row_info = get_tweet_data(tweet_id)
                writer.writerow(row_info)
            except Exception as e:
                print(tweet_id, ":", e)
                continue

In [None]:
df = pd.read_csv('covid_tweets.csv', header=None)
for t in df[9]:
    print(len(t))

In [None]:
row_info['full_text']

# Tests

In [10]:
import os
import tweepy
import csv
import pandas as pd
from tweet import config

consumer_key = os.environ["CONSUMER_KEY"]
consumer_secret = os.environ["CONSUMER_SECRET"]

auth = tweepy.AppAuthHandler(consumer_key, consumer_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

tweet_id = '1219765513514930176'
tweet = api.get_status(tweet_id, tweet_mode='extended')
tweet

Status(_api=<tweepy.api.API object at 0x7fe61738df40>, _json={'created_at': 'Tue Jan 21 23:35:35 +0000 2020', 'id': 1219765513514930176, 'id_str': '1219765513514930176', 'full_text': 'RT @nikki_miumiu: #CDC has confirmed the first #US case of #WuhanPneumonia, the patient is now under observation in Washington state. This…', 'truncated': False, 'display_text_range': [0, 139], 'entities': {'hashtags': [{'text': 'CDC', 'indices': [18, 22]}, {'text': 'US', 'indices': [47, 50]}, {'text': 'WuhanPneumonia', 'indices': [59, 74]}], 'symbols': [], 'user_mentions': [{'screen_name': 'nikki_miumiu', 'name': 'Nikki😷', 'id': 1162111357, 'id_str': '1162111357', 'indices': [3, 16]}], 'urls': []}, 'source': '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>', 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 1202169343250821120, 'id_str': '12021

In [6]:
save_path = config.data / 'covid19_tweets_id_subset.csv'
df = pd.read_csv(save_path, header=None)
df[12].value_counts()

en    83
Name: 12, dtype: int64

In [16]:
tweet.retweeted_status.favorite_count

64