In [36]:
# created on Dec 24, 2020
# modified on April 12, 2022
# @author:          Bo Zhao
# @email:           zhaobo@uw.edu
# @website:         https://hgis.uw.edu
# @organization:    Department of Geography, University of Washington, Seattle
# @description:     Search geo-tagged tweets within the U.S. This script is modified from https://github.com/shawn-terryah/Twitter_Geolocation

In [37]:
import tweepy, json, time
import pandas as pd
from google.colab import files
# Create data on to Google Drive
from google.colab import drive
# Mount your Drive to the Colab VM.
drive.mount('/gdrive')

Mounted at /gdrive


In [38]:
class StreamListener(tweepy.StreamListener):
    """tweepy.StreamListener is a class provided by tweepy used to access
    the Twitter Streaming API to collect tweets in real-time.
    """

    def __init__(self, time_limit=60, file=""):
        """class initialization"""
        self.start_time = time.time()
        self.limit = time_limit
        self.result = []
        self.f = file
        super(StreamListener, self).__init__()

    def on_data(self, data):
        """This is called when data are streamed in."""
        if (time.time() - self.start_time) < self.limit:
            datajson = json.loads(data)
            # print(datajson, "\n")
            if 'id' not in datajson.keys():
                time.sleep(10)
            else:
                id = datajson['id']
                username = datajson['user']['screen_name']
                created_at = datajson['created_at']
                text = datajson['text'].strip().replace("\n", "")

                # process the geo-tags
                if datajson['coordinates'] == None:
                    try:
                        bbox = datajson['place']['bounding_box']['coordinates'][0]
                        lng = (bbox[0][0] + bbox[2][0]) / 2.0
                        lat = (bbox[0][1] + bbox[1][1]) / 2.0
                    except:
                        lat = 0
                        lng = 0
                else:
                    lng = datajson['coordinates']['coordinates'][0]
                    lat = datajson['coordinates']['coordinates'][1]

                if lat != 0:
                    row = {
                        'id': id,
                        'username': username,
                        'created_at': created_at,
                        'lng': lng,
                        'lat': lat,
                        'text': text
                    }
                    print(row)
                    self.result.append(row)
                else:
                    pass
        else:
            df = pd.DataFrame(self.result)
            df.to_csv(self.f, index=False)
            # download the csv to your local computer
            files.download(self.f)
            print("the csv has been downloaded to your local computer. The program has been completed successfully.")
            return False

In [44]:
if __name__ == "__main__":
    # These are provided to you through the Twitter API after you create a account
    # register a Twitter App to get the keys and access tokens.
    output_file = '/gdrive/My Drive/geotweets.csv'

    # Apply for your own Twitter API keys at https://developer.twitter.com/en/apply-for-access
    consumer_key = "###"
    consumer_secret = "###"
    access_token = "###"
    access_token_secret = "###"

    myauth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    myauth.set_access_token(access_token, access_token_secret)

    # LOCATIONS are the longitude, latitude coordinate corners for a box that restricts the
    # geographic area from which you will stream tweets. The first two define the southwest
    # corner of the box and the second two define the northeast corner of the box.
    LOCATIONS = [-13.863504, 35.770642, 20.058751, 70.263953] #Western Europe (Iberia, British Isles, Italy, Germany, Low Countries, Norway and Sweden)
    stream_listener = StreamListener(time_limit=600, file=output_file)
    stream = tweepy.Stream(auth=myauth, listener=stream_listener)
    stream.filter(locations=LOCATIONS, languages=['en'], encoding="utf-8")

{'id': 1517711421311791106, 'username': 'JasonUnsworth__', 'created_at': 'Sat Apr 23 03:46:15 +0000 2022', 'lng': 0.15664, 'lat': 52.67074, 'text': 'Just posted a video @ Wisbech https://t.co/dkcHYXQ4Wh'}
{'id': 1517711466937393152, 'username': 'HerkAlex', 'created_at': 'Sat Apr 23 03:46:26 +0000 2022', 'lng': 4.3063955, 'lat': 52.076868000000005, 'text': '@RonSexsmith ‘The Changing of the Guards’ is probably my favourite Dylan song and always in my head. 🔥'}
{'id': 1517711477272100864, 'username': 'BStarr_2', 'created_at': 'Sat Apr 23 03:46:28 +0000 2022', 'lng': 8.636605500000002, 'lat': 50.1212355, 'text': 'Drought 2 Wayne 🔥 https://t.co/959ZONpVzF'}
{'id': 1517711534180511745, 'username': 'swepal2', 'created_at': 'Sat Apr 23 03:46:42 +0000 2022', 'lng': 17.980058999999997, 'lat': 59.333670999999995, 'text': '@Oscar_R_Geeho @phylmurphy @retiredjudean @traseas7 @dezi555 @corjoy @Desuetudine @emmettimanu @niesdutz… https://t.co/GLEFtyH8cU'}
{'id': 1517711556468953088, 'username': 'amj

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

the csv has been downloaded to your local computer. The program has been completed successfully.


In [43]:
    LOCATIONS = [-124.7771694, 24.520833, -66.947028, 49.384472,  # Contiguous US
                 -164.639405, 58.806859, -144.152365, 71.76871,  # Alaska
                 -160.161542, 18.776344, -154.641396, 22.878623]  # Hawaii
    stream_listener = StreamListener(time_limit=600, file=output_file)
    stream = tweepy.Stream(auth=myauth, listener=stream_listener)
    stream.filter(locations=LOCATIONS, languages=['en'], encoding="utf-8")

{'id': 1517702589470625799, 'username': 'K_I_M_____4eva', 'created_at': 'Sat Apr 23 03:11:09 +0000 2022', 'lng': -74.11787849999999, 'lat': 40.9057975, 'text': 'It’s just something about fresh fried chicken cooked in fresh grease that make me always want some😂😂'}
{'id': 1517702589365764096, 'username': 'CynthiaWalsom', 'created_at': 'Sat Apr 23 03:11:09 +0000 2022', 'lng': -92.0611765, 'lat': 30.206079000000003, 'text': '@KevinSixx13 @WHO @ReutersFacts @ReutersUS @dmca1dkr8r @usafhc @mcposf @chiefstonefox @SavetheChildren @UNDRIP21… https://t.co/KwB6xlbtOM'}
{'id': 1517702590468747264, 'username': 'T0TALC0NFUSI0N', 'created_at': 'Sat Apr 23 03:11:09 +0000 2022', 'lng': -118.4119065, 'lat': 34.0207895, 'text': 'Chase Shakur’s music speaks to the soul.'}
{'id': 1517702592159272960, 'username': 'AllegedlyJoshie', 'created_at': 'Sat Apr 23 03:11:10 +0000 2022', 'lng': -84.433106, 'lat': 33.7671945, 'text': 'Pure. Unadulterated. Joy.'}
{'id': 1517702595233689600, 'username': 'matthewlaing',

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

the csv has been downloaded to your local computer. The program has been completed successfully.
