In [24]:
import pandas as pd
import numpy as np

import snscrape.modules.twitter as sntwitter


## Scraping Twitter with Multiple Queries Using Snscrape

In [25]:
# Queries are obtained from Twitter's 'Advanced Search' method
# min_faves is set to 3 to avoid spam tweets

min_date = '2015-01-01'
max_date = '2023-04-15'
min_faves = 3

n_query_dict = {
    # English
    f'"electric car" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 250000,
    f'"electric cars" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 250000,
    f'"electric vehicle" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 250000,
    f'"electric vehicles" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 250000,
    f'"charging station" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 250000,
    f'"charging stations" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 250000,
    # Spanish
    f'"coche eléctricor" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    f'"coches eléctricos" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    f'"estación de carga" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    f'"estaciones de carga" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    # French
    f'"voiture électrique" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    f'"voitures électriques" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    f'"station de charge" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    f'"stations de recharge" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    # Italian
    f'"auto elettrica" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    f'"auto elettriche" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    f'"stazione di ricarica" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    f'"stazioni di ricarica" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    # German
    f'"Elektroauto" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    f'"Elektroautos" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    f'"Ladestation" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    f'"Ladestationen" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    # Portugese
    f'"carro eléctrico" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    f'"automóveis eléctricos" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    f'"cestação de carga" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    f'"estações de carregamento" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    # Chinese
    f'"电子车" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    f'"电动车" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    f'"充电站" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 100000,
    # Russian
    f'"электромобиль" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 25000,
    f'"электромобили" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 25000,
    f'"зарядная станция" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 25000,
    f'"зарядные станции" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 25000,
    # Japanese
    f'"電気自動車" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 25000,
    f'"電気自動車" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 25000,
    f'"チャージングステーション" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 25000,
    f'"充電スタンド" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 25000,
    # Korean
    f'"전기 자동차" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 25000,
    f'"전기차" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 25000,
    f'"충전 스테이션" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 25000,
    f'"충전소" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 25000,
    # Dutch
    f'"elektrische auto" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 25000,
    f'"elektrische autos" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 25000,
    f'"laadstation" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 25000,
    f'"laadstations" min_faves:{min_faves} until:{max_date} since:{min_date} -filter:links': 25000
}

# translations are obtained from https://www.deepl.com/translator

news_languages = [' news', ' noticias', ' nouvelles', ' notizie',
                  ' Nachrichten', ' notícias', ' 新闻', ' новости', ' ニュース', ' 뉴스', ' nieuws']

all_tweets = []

for query in list(n_query_dict.keys()):
    query_limit = int(n_query_dict[query])
    tweets = []
    for tweet in sntwitter.TwitterSearchScraper(query).get_items():
        if len(tweets) == int(n_query_dict[query]):
            break
        else:
            # geo-coding is not available for most tweets so they are not included in the dataframe. News reporting tweets are aimed to be avoided
            if tweet.coordinates != None and any(news not in  str(tweet.user.username).lower() for news in news_languages):
                tweets.append([tweet.id, tweet.date, tweet.coordinates, tweet.replyCount,
                               tweet.retweetCount, tweet.likeCount, tweet.lang, tweet.rawContent])
    all_tweets.append(tweets)

lists = []

for i in all_tweets:
    lists += i

arr = np.concatenate([np.array(lists)])

df = pd.DataFrame(arr, columns=['id', 'date', 'coordinates', 'number_of_replies',
                  'number_of_retweets', 'number_of_likes', 'language', 'raw_content'])


display(df)


Unnamed: 0,id,date,coordinates,number_of_replies,number_of_retweets,number_of_likes,language,raw_content
0,1646333442651717633,2023-04-13 02:04:14+00:00,"Coordinates(longitude=-118.0632981, latitude=3...",0,0,5,en,@SMHatLibs I noticed that electric car quit in...
1,1646044706776793088,2023-04-12 06:56:54+00:00,"Coordinates(longitude=-3.9526026, latitude=40....",1,0,5,en,Many of you ask me about my experience with my...
2,1645731911191461894,2023-04-11 10:13:58+00:00,"Coordinates(longitude=-7.66085699743185, latit...",1,0,5,en,@AutomotiveDia It's pennies mate\n\n£600 rent/...
3,1645665048436805632,2023-04-11 05:48:17+00:00,"Coordinates(longitude=152.668522848, latitude=...",0,3,14,en,@CarExpertAus Dreaming here as electric cars ...
4,1645150581638438914,2023-04-09 19:43:58+00:00,"Coordinates(longitude=-83.67529, latitude=36.5...",0,0,3,en,@airat618150 @metadriveapp Looking good! Yeah...
...,...,...,...,...,...,...,...,...
8889,1306479802925318145,2020-09-17 06:27:12+00:00,"Coordinates(longitude=4.1723759, latitude=50.6...",1,0,2,nl,@iOnAsJ Wij hebben een 30tal H2-wagens rondrij...
8890,1607705706701983746,2022-12-27 11:51:24+00:00,"Coordinates(longitude=0.3406128, latitude=44.6...",3,0,9,nl,@mbchamstra @IonityChargers @ENGIEpartFR 1) He...
8891,1476542724983644165,2021-12-30 13:16:33+00:00,"Coordinates(longitude=7.4924658, latitude=50.5...",1,0,3,da,@martingrefte @matthijsklaver @AllegoCharging ...
8892,1427967707471925254,2021-08-18 12:16:46+00:00,"Coordinates(longitude=6.4645947, latitude=51.8...",5,0,9,nl,@pvmeekeren @Fastned Het pijnpunt zit hem in h...


In [26]:
df.to_pickle(
    '/Users/okankoklu/Desktop/EXETER/Year 3/BEM3064 Data Viz/DATAVIZ PROJECT/raw_tweets_dataframe.pkl')