In [1]:
import csv
import pandas as pd
import snscrape.modules.twitter as sntwitter

### Custom functions

In [27]:
def tweet2cell(tweet, lat, lon, dist, city, query):
    url = tweet.url
    date = tweet.date
    content = tweet.content
    # user = tweet.user.username
    # reply_cnt = tweet.replyCount
    # retweet_cnt = tweet.retweetCount
    # quote_cnt = tweet.quoteCount
    # like_cnt = tweet.likeCount
    # lang = tweet.lang
    # source = tweet.sourceLabel
    # media = tweet.media
    # mentioned = tweet.mentionedUsers
    # retweeted_tweet = tweet.retweetedTweet
    # quoted_tweet = tweet.quotedTweet
    
    '''
    cell = [date, content, lang, source,
            retweeted_tweet, quoted_tweet,
            reply_cnt, retweet_cnt, quote_cnt, like_cnt,
            media, mentioned, user, url,
            lat, lon, dist, f"near {city}", query]
    '''
    
    cell = [date, content, url,
            lat, lon, dist, f"near {city}", query]
    
    return cell

In [28]:
def get_tweets(maxTweets, terms, since, until, lat=None, lon=None, dist=None, city=None):
    # Info on queries: https://developer.twitter.com/en/docs/twitter-api/v1/tweets/search/guides/standard-operators
    if None not in (lat, lon, dist, city):
        query = f"{terms} until:{until} since:{since} geocode:{lat},{lon},{dist}"
    else:
        query = f"{terms} until:{until} since:{since}"
        
    tweets = []
    for i,tweet in enumerate(sntwitter.TwitterSearchScraper(query).get_items()):
        tweet_cell = tweet2cell(tweet, lat, lon, dist, city, query)
        tweets.append(tweet_cell)
        if i > maxTweets:
            break
    
    if len(tweets)==0:
        return None
    
    df = pd.DataFrame(tweets)
    df.columns = ['date', 'content', 'url', 
                  'lat', 'lon', 'dist', 'city', 'query']
    
    return df

### Capital cities in Colombia

In [29]:
cities = pd.read_csv("../archivos/co_small.csv")
cities.head()

Unnamed: 0,city,lat,lng,country,iso2,admin,capital,population,population_proper
0,Bogotá,4.649178,-74.062827,Colombia,CO,Bogotá,primary,7772000.0,6333661.0
1,Medellín,6.25184,-75.563591,Colombia,CO,Antioquia,admin,3297000.0,1999979.0
2,Cali,3.437222,-76.5225,Colombia,CO,Valle del Cauca,admin,2254000.0,2178836.0
3,Barranquilla,10.963889,-74.796389,Colombia,CO,Atlántico,admin,1798000.0,1244491.0
4,Bucaramanga,7.125393,-73.119804,Colombia,CO,Santander,admin,1009000.0,571820.0


### Get data

In [30]:
maxTweets = 100
terms = "pandemia"

since = "2021-01-01"
until = "2021-03-01"

In [31]:
dist = "100mi"

dfs = []

for row in cities.iterrows():
    lat = row[1]['lat']
    lon = row[1]['lng']
    city = row[1]['city']
    
    df = get_tweets(maxTweets, terms, since, until, lat, lon, dist, city)
    
    if df is not None:
        dfs.append(df)
        print(f"Collected {df.shape[0]:,.0f} tweets from {city}")
    else:
        print(f"*** Collected 0 tweets from {city}")

Collected 102 tweets from Bogotá
Collected 102 tweets from Medellín
Collected 102 tweets from Cali
Collected 102 tweets from Barranquilla
Collected 102 tweets from Bucaramanga
Collected 102 tweets from Cartagena
Collected 102 tweets from Cúcuta
Collected 102 tweets from Pereira
Collected 102 tweets from Santa Marta
Collected 102 tweets from Ibagué
Collected 102 tweets from Pasto
Collected 102 tweets from Manizales
Collected 102 tweets from Villavicencio
Collected 102 tweets from Neiva
Collected 102 tweets from Armenia
Collected 102 tweets from Valledupar
Collected 102 tweets from Montería
Collected 102 tweets from Sincelejo
Collected 102 tweets from Popayán
Collected 102 tweets from Tunja
Collected 102 tweets from Ríohacha
Collected 102 tweets from Florencia
Collected 102 tweets from Quibdó
Collected 102 tweets from Arauca
Collected 102 tweets from Yopal
Collected 29 tweets from Leticia
Collected 8 tweets from San Andrés
Collected 102 tweets from San José del Guaviare
Collected 102 twe

In [32]:
dfs = pd.concat(dfs, ignore_index=True)
dfs = dfs.drop_duplicates(subset='url')

print(dfs.shape[0])

996


In [33]:
dfs.head()

Unnamed: 0,date,content,url,lat,lon,dist,city,query
0,2021-02-28 23:54:25+00:00,No sería raro que en dos semanas haya un nuevo...,https://twitter.com/AndresQuinte12/status/1366...,4.649178,-74.062827,100mi,near Bogotá,pandemia until:2021-03-01 since:2021-01-01 geo...
1,2021-02-28 23:47:58+00:00,Si fuera de Millos también estaría en mi casa ...,https://twitter.com/meprendorapidoo/status/136...,4.649178,-74.062827,100mi,near Bogotá,pandemia until:2021-03-01 since:2021-01-01 geo...
2,2021-02-28 23:47:37+00:00,Ayer fue de esos días que quería hacer una loc...,https://twitter.com/Danistefo/status/136617323...,4.649178,-74.062827,100mi,near Bogotá,pandemia until:2021-03-01 since:2021-01-01 geo...
3,2021-02-28 22:44:43+00:00,Chevere que empiecen a abrir espacios para ret...,https://twitter.com/juanmaquiroz/status/136615...,4.649178,-74.062827,100mi,near Bogotá,pandemia until:2021-03-01 since:2021-01-01 geo...
4,2021-02-28 22:06:31+00:00,"""Cuando el mundo luchaba contra una pandemia"" ...",https://twitter.com/MillosDColombia/status/136...,4.649178,-74.062827,100mi,near Bogotá,pandemia until:2021-03-01 since:2021-01-01 geo...


In [34]:
dfs.to_csv("pandemia.csv", index=False)