Documentation: https://pypi.org/project/twint/

In [4]:
import pandas as pd
import numpy as np
import csv
from collections import Counter

In [2]:
# Read in 100 users per side
influential_csv = pd.read_csv('influential_tweeters.csv')

In [3]:
influential_csv.head(50)

Unnamed: 0,affirming,denying
0,algore,skepticscience
1,oxfamgb,ScienceNotDogma
2,globalactplan,Carbongate
3,GreenpeaceUK,Neets2245
4,WWF_Climate,Co2Green
5,OneClimate,thegwpfcom
6,Ed_Miliband,CO2Coalition
7,CarolineLucas,Action4Life_
8,DECCgovuk,fredpalmercoal
9,the_ecologist,NoTricksZone


In [4]:
influential_csv.shape

(99, 2)

In [5]:
print(len(np.unique(influential_csv['denying'])))
print(len(np.unique(influential_csv['affirming'])))

96
96


In [6]:
print(len(influential_csv['denying']))
print(len(influential_csv['affirming']))

99
99


In [11]:
[x for x in Counter(influential_csv['denying']) if Counter(influential_csv['denying'])[x] > 1]

[]

In [12]:
[x for x in Counter(influential_csv['affirming']) if Counter(influential_csv['affirming'])[x] > 1]

[]

In [9]:
influential_csv['denying'].drop_duplicates(keep='first',inplace=True)
influential_csv['affirming'].drop_duplicates(keep='first',inplace=True)

In [10]:
influential_csv.shape

(99, 2)

Scrape these users' tweets using Twint:

In [13]:
import twint
import nest_asyncio
import os
nest_asyncio.apply()

In [14]:
SEARCH_TERMS = ["sea","ice","melt","co2","methane","greenhouse","carbon",
               "emission","anthropogenic","temperature","warming","climate","coal","IPCC","environment",
               "environmental","cap and trade",'energy','planet','renewable','fossil fuel','environmentalist']
# TO DO: expand search_terms w/ some of Zach's terms

In [15]:
SCRAPE_DIR = '/per_user_search_term_scrapes'

In [16]:
def create_twint_config(user,end_date,search_term):
    c = twint.Config()
    c.Show_hashtags = True
    c.Count = True
    c.Stats = True
    c.Store_csv = True
    c.Retweets = False
    c.Custom["tweet"] = ["id", "date", "time", "tweet", "mentions", "replies_count", "retweets_count", 
                         "likes_count", "hashtags", "username"]
    c.Format = "ID {id} -||- Date {date} -||- Time {time} -||- Tweet {tweet} -||- Mentions {mentions} -||- \
    Repliescount {replies_count} \
    -||- Retweetscount {retweets_count} -||- Likescount {likes_count} -||- Hashtags {hashtags} -||- Username {username}"
    sep = " -||- "
    c.Username = user
    c.Until = end_date
    c.Search = search_term
    
    return c

# RESET

In [28]:
influential_csv.head(20)

Unnamed: 0,affirming,denying
0,algore,skepticscience
1,oxfamgb,ScienceNotDogma
2,globalactplan,Carbongate
3,GreenpeaceUK,Neets2245
4,WWF_Climate,Co2Green
5,OneClimate,thegwpfcom
6,Ed_Miliband,CO2Coalition
7,CarolineLucas,Action4Life_
8,DECCgovuk,fredpalmercoal
9,the_ecologist,NoTricksZone


In [2407]:
curr_deny = influential_csv.iloc[92]['denying']
print('Current denying user:',curr_deny)
#c.Username = curr_affirm

Current denying user: EcoSenseNow


## RESTART HERE

In [2519]:
config = create_twint_config(
    curr_deny,None,None)

In [2526]:
config.__dict__

{'Show_hashtags': True,
 'Count': True,
 'Stats': True,
 'Store_csv': True,
 'Retweets': False,
 'Format': 'ID {id} -||- Date {date} -||- Time {time} -||- Tweet {tweet} -||- Mentions {mentions} -||-     Repliescount {replies_count}     -||- Retweetscount {retweets_count} -||- Likescount {likes_count} -||- Hashtags {hashtags} -||- Username {username}',
 'Username': 'EcoSenseNow',
 'Until': '2013-04-30',
 'Search': 'environmentalist'}

In [2527]:
if config.Search == None:
    start_index = 0
else:
    start_index = SEARCH_TERMS.index(config.Search)

In [2528]:
start_index

21

In [2530]:
for ix in range(21,len(SEARCH_TERMS)):
    search_term = SEARCH_TERMS[ix]
    #config.Until = None
    config.Search = search_term
    config.Output = os.getcwd()+SCRAPE_DIR+'/{}_search-term={}.csv'.format(config.Username,config.Search)
    print('Getting tweets with keyword: ' + config.Search)#search_term in SEARCH_TERMS:
    #c.Search = search_term
    #c.Pandas = True
    #c.Store_object = True
    #c.Output = os.getcwd()+SCRAPE_DIR+'/{}_search-term={}.csv'.format(c.Username,c.Search)
    twint.run.Search(config)
    

Getting tweets with keyword: environmentalist


CRITICAL:root:twint.run:Twint:Feed:noDataExpecting value: line 1 column 1 (char 0)
CRITICAL:root:twint.run:Twint:Feed:noDataExpecting value: line 1 column 1 (char 0)
CRITICAL:root:twint.run:Twint:Feed:noDataExpecting value: line 1 column 1 (char 0)


[+] Finished: Successfully collected 0 Tweets from @EcoSenseNow.


In [2524]:
old_ix = ix

ix,search_term

(21, 'environmentalist')

In [2525]:
latest_tweets = pd.read_csv(os.getcwd()+SCRAPE_DIR+'/{}_search-term={}.csv'.format(config.Username,search_term))
latest_date = latest_tweets['date'].iloc[-1]
config = create_twint_config(curr_deny,latest_date,config.Search)

Organize scraped Tweets from all searchterms in df; remove duplicates.

In [2531]:
config.Username

'EcoSenseNow'

In [2532]:
user_tweets = []
for search_term in SEARCH_TERMS:
    try:
        tweets_per_search_term = pd.read_csv(os.getcwd()+SCRAPE_DIR+'/{}_search-term={}.csv'.format(config.Username,search_term))
        tweets_per_search_term['search_term'] = search_term
        user_tweets.append(tweets_per_search_term)
    except FileNotFoundError:
        pass

In [2533]:
len(user_tweets) # each one is a df

21

In [2534]:
user_tweets[0]

Unnamed: 0,id,date,time,tweet,mentions,replies_count,retweets_count,likes_count,hashtags,username,search_term
0,1190303540347977728,2019-11-01,09:24:13,And that the land is rising faster than the se...,['awco2011'],1,2,7,[],ecosensenow,sea
1,1182711109536509952,2019-10-11,10:34:36,Hah! I am 72 and I have seen sea foam many tim...,[],23,189,556,[],ecosensenow,sea
2,1181714167310368768,2019-10-08,16:33:07,"Predictions, predictions, all we ever get are...",[],20,477,836,[],ecosensenow,sea
3,1180495902428942336,2019-10-05,07:52:10,Interesting study showing marsh plants help bu...,[],4,81,182,[],ecosensenow,sea
4,1180141725739675648,2019-10-04,08:24:48,It is not “waste”. Used nuclear fuel can be re...,['mrsjanecoles'],11,22,94,[],ecosensenow,sea
...,...,...,...,...,...,...,...,...,...,...,...
485,406415906827292673,2013-11-29,05:34:37,"@MillenniaIT We already have thousands of ""fis...",[],1,0,0,[],ecosensenow,sea
486,406266734023147520,2013-11-28,19:41:52,"Total sea ice, Arctic and Antarctic, now highe...",[],4,30,7,[],ecosensenow,sea
487,401397113344368640,2013-11-15,09:11:44,@GreenGregDennis Says damage worse due to sea...,['greengregdennis'],1,0,0,[],ecosensenow,sea
488,397734134165950464,2013-11-05,06:36:21,@JWSpry @SteveSGoddard Meanwhile Antarctic sea...,['jwspry'],0,2,0,[],ecosensenow,sea


In [2535]:
full_tweets = pd.concat(user_tweets,ignore_index=True)
full_tweets.shape

(13633, 11)

In [2536]:
full_tweets['search_term'].value_counts()

co2                 4502
climate             2675
energy              1254
warming              805
ice                  734
carbon               577
coal                 513
temperature          498
sea                  490
IPCC                 344
fossil fuel          300
planet               247
greenhouse           220
renewable            131
environmental        125
environment           72
methane               49
melt                  39
environmentalist      33
emission              16
anthropogenic          9
Name: search_term, dtype: int64

In [2537]:
#from collections import Counter
counted_ids = Counter(full_tweets['id'])
print(len([(id_,counted_ids[id_]) for id_ in counted_ids if counted_ids[id_] > 1]))

2434


In [2538]:
np.sum([counted_ids[id_] for id_ in counted_ids if counted_ids[id_] > 1])

5222

In [2539]:
full_tweets.drop_duplicates(subset ="id", keep = 'first', inplace = True)
full_tweets.shape

(10845, 11)

In [2540]:
print(config.Username)

EcoSenseNow


In [2541]:
full_tweets.to_pickle('{}_unique_tweets.pkl'.format(config.Username))

In [2542]:
full_tweets = pd.read_pickle('{}_unique_tweets.pkl'.format(config.Username))

In [2543]:
full_tweets

Unnamed: 0,id,date,time,tweet,mentions,replies_count,retweets_count,likes_count,hashtags,username,search_term
0,1190303540347977728,2019-11-01,09:24:13,And that the land is rising faster than the se...,['awco2011'],1,2,7,[],ecosensenow,sea
1,1182711109536509952,2019-10-11,10:34:36,Hah! I am 72 and I have seen sea foam many tim...,[],23,189,556,[],ecosensenow,sea
2,1181714167310368768,2019-10-08,16:33:07,"Predictions, predictions, all we ever get are...",[],20,477,836,[],ecosensenow,sea
3,1180495902428942336,2019-10-05,07:52:10,Interesting study showing marsh plants help bu...,[],4,81,182,[],ecosensenow,sea
4,1180141725739675648,2019-10-04,08:24:48,It is not “waste”. Used nuclear fuel can be re...,['mrsjanecoles'],11,22,94,[],ecosensenow,sea
...,...,...,...,...,...,...,...,...,...,...,...
13628,584559955954532352,2015-04-04,20:35:28,RT @rln_nelson: .Cut me slack! That was early ...,"['rln_nelson', 'kaboom1776', 'appflyer']",0,3,4,[],ecosensenow,environmentalist
13629,426058118141730816,2014-01-22,10:25:45,"@BsBioNick @Greenpeace Glad you like ""Confessi...","['bsbionick', 'greenpeace']",1,0,0,[],ecosensenow,environmentalist
13630,419139485071187968,2014-01-03,08:13:35,"@RadicalOmnivore Way too ""serious"". I am not j...",['radicalomnivore'],1,0,0,[],ecosensenow,environmentalist
13631,332198880563318785,2013-05-08,11:22:39,The making of a #sensible #environmentalist. ...,[],0,3,2,"['#sensible', '#environmentalist']",ecosensenow,environmentalist


In [1]:
import os
len(os.listdir(os.getcwd()+'/affirm_tweets/'))

88

In [6]:
os.listdir(os.getcwd()+'/deny_tweets/')

['50ShadesGreenNZ_unique_tweets.pkl',
 '_My2_Cents__unique_tweets.pkl',
 'Action4Life__unique_tweets.pkl',
 'adapt2030_unique_tweets.pkl',
 'AGWHOAX1_unique_tweets.pkl',
 'AmazingPatriot_unique_tweets.pkl',
 'Angels_of_Truth_unique_tweets.pkl',
 'caleb_rossiter_unique_tweets.pkl',
 'Carbongate_unique_tweets.pkl',
 'ccdeditor_unique_tweets.pkl',
 'ChrisMartzWX_unique_tweets.pkl',
 'clim8resistance_unique_tweets.pkl',
 'ClimateDepot_unique_tweets.pkl',
 'ClimatePoet_unique_tweets.pkl',
 'ClimateRealists_unique_tweets.pkl',
 'ClimateSkeptic_unique_tweets.pkl',
 'ClimatismBlog_unique_tweets.pkl',
 'co2_the_unique_tweets.pkl',
 'CO2Coalition_unique_tweets.pkl',
 'Co2Green_unique_tweets.pkl',
 'CO2notes_unique_tweets.pkl',
 'CoolsensuS_unique_tweets.pkl',
 'DaveMyFace_unique_tweets.pkl',
 'DenierBot_unique_tweets.pkl',
 'dieselfastpitch_unique_tweets.pkl',
 'digitaldoc4_unique_tweets.pkl',
 'drwaheeduddin_unique_tweets.pkl',
 'EcoSenseNow_unique_tweets.pkl',
 'EdBohman_unique_tweets.pkl',
 '

In [10]:
all_deny_tweets = []
for file in os.listdir(os.getcwd()+'/deny_tweets/'):
    tweets_per_user = pd.read_pickle(os.getcwd()+'/deny_tweets/'+file)
    all_deny_tweets.append(tweets_per_user)

In [11]:
deny_tweets = pd.concat(all_deny_tweets)
deny_tweets.shape

(290084, 11)

In [6]:
all_affirm_tweets = []
for file in os.listdir(os.getcwd()+'/affirm_tweets/'):
    tweets_per_user = pd.read_pickle(os.getcwd()+'/affirm_tweets/'+file)
    all_affirm_tweets.append(tweets_per_user)

In [7]:
affirm_tweets = pd.concat(all_affirm_tweets)
affirm_tweets.shape

(230802, 11)

In [12]:
deny_tweets.columns

Index(['id', 'date', 'time', 'tweet', 'mentions', 'replies_count',
       'retweets_count', 'likes_count', 'hashtags', 'username', 'search_term'],
      dtype='object')

In [13]:
%matplotlib inline

In [22]:
deny_tweets['username'].value_counts()[:10]

tan123             39788
carbongate         27694
friendsoscience    21829
tony__heller       19680
jwspry             15452
climaterealists    13339
ccdeditor          11077
sylviad32911201    10862
ecosensenow        10845
climatedepot       10388
Name: username, dtype: int64

In [23]:
affirm_tweets['username'].value_counts()[:10]

climatehome        23722
grist              18293
tveitdal           15355
climateprogress    12718
james_bg           10125
mikehudema          8658
revkin              8406
leohickman          7157
andrealearned       7107
pauledawson         6750
Name: username, dtype: int64

In [19]:
engagement_df = pd.DataFrame.from_dict({'deny':
                                        {'retweets':
                                         {'mean':round(deny_tweets['retweets_count'].mean(),1),
                                         'min':round(deny_tweets['retweets_count'].min(),1),
                                         'max':round(deny_tweets['retweets_count'].max(),1)},
                                        'likes':
                                         {'mean':round(deny_tweets['likes_count'].mean(),1),
                                         'min':round(deny_tweets['likes_count'].min(),1),
                                         'max':round(deny_tweets['likes_count'].max(),1)}
                                        },
                                       'affirm':
                                        {
                                            'retweets':
                                            {'mean':round(affirm_tweets['retweets_count'].mean(),1),
                                         'min':round(affirm_tweets['retweets_count'].min(),1),
                                         'max':round(affirm_tweets['retweets_count'].max(),1)},
                                        'likes':
                                         {'mean':round(affirm_tweets['likes_count'].mean(),1),
                                         'min':round(affirm_tweets['likes_count'].min(),1),
                                         'max':round(affirm_tweets['likes_count'].max(),1)}
                                            
                                        }
                                       })
engagement_df

Unnamed: 0,deny,affirm
retweets,"{'mean': 15.3, 'min': 0, 'max': 97641}","{'mean': 38.2, 'min': 0, 'max': 87661}"
likes,"{'mean': 41.3, 'min': 0, 'max': 222429}","{'mean': 83.1, 'min': 0, 'max': 430370}"


In [64]:
deny_tweets.loc[deny_tweets['retweets_count'] == deny_tweets['retweets_count'].max()]

Unnamed: 0_level_0,date,time,tweet,mentions,replies_count,retweets_count,likes_count,hashtags,username,search_term
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
265895292191248385,2012-11-06,11:15:52,The concept of global warming was created by a...,[],12724,97641,67914,[],realdonaldtrump,warming


In [25]:
deny_tweets.loc[deny_tweets['likes_count'] == deny_tweets['likes_count'].max()]

Unnamed: 0,id,date,time,tweet,mentions,replies_count,retweets_count,likes_count,hashtags,username,search_term
283,1089691985164800001,2019-01-27,17:09:49,"After all that I have done for the Military, o...",[],65462,45667,222429,[],realdonaldtrump,energy


In [68]:
deny_tweets.loc[1089691985164800001]['tweet']

'After all that I have done for the Military, our great Veterans, Judges (99), Justices (2), Tax & Regulation Cuts, the Economy, Energy, Trade & MUCH MORE, does anybody really think I won’t build the WALL? Done more in first two years than any President! MAKE AMERICA GREAT AGAIN!'

In [26]:
affirm_tweets.loc[affirm_tweets['retweets_count'] == affirm_tweets['retweets_count'].max()]

Unnamed: 0,id,date,time,tweet,mentions,replies_count,retweets_count,likes_count,hashtags,username,search_term
11,1151868495824642049,2019-07-18,07:57:04,Reminder of what people are calling the “radic...,[],45949,87661,311822,[],aoc,energy


In [70]:
affirm_tweets = affirm_tweets.set_index('id')

In [71]:
affirm_tweets.loc[1151868495824642049]['tweet']

'Reminder of what people are calling the “radical, extreme-left agenda”:\n\n✅ Medicare for All\n✅ A Living Wage & Labor Rights\n✅ K-16 schooling, aka Public Colleges\n✅ 100% Renewable Energy\n✅ Fixing the pipes in Flint\n✅ Not Hurting Immigrants\n✅ Holding Wall Street Accountable'

In [27]:
affirm_tweets.loc[affirm_tweets['likes_count'] == affirm_tweets['likes_count'].max()]

Unnamed: 0,id,date,time,tweet,mentions,replies_count,retweets_count,likes_count,hashtags,username,search_term
14,1178002285751865344,2019-09-28,10:43:25,I have moved on from this climate thing... Fro...,[],6157,59014,430370,[],gretathunberg,climate


In [72]:
affirm_tweets.loc[1178002285751865344]['tweet']

'I have moved on from this climate thing... From now on I will be doing death metal only!!  https://twitter.com/aliamjadrizvi/status/1177947891945627648\xa0…'

In [34]:
deny_tweets.sort_values('retweets_count',ascending=False).head(1000)['username'].value_counts()

ecosensenow        301
realdonaldtrump    262
manny_ottawa       179
tony__heller        83
junkscience         29
electroversenet     27
jwspry              15
tan123              15
nikolovscience      14
jaggermickoz        12
seanhannity          9
drwaheeduddin        8
climaterealists      8
msroberts0619        6
thegwpfcom           5
sylviad32911201      5
rokoshme             3
thiagomaia2503       2
wattsupwiththat      2
sciencenotdogma      2
clim8resistance      1
notrickszone         1
willynerdal1         1
skepticscience       1
ike_kiefer           1
grand_solar          1
rogtallbloke         1
carbongate           1
uk_ecology           1
action4life_         1
grandsolarmin        1
climatepoet          1
pplonia              1
Name: username, dtype: int64

In [36]:
affirm_tweets.sort_values('retweets_count',ascending=False).head(1000)['username'].value_counts()

mikehudema         159
gretathunberg      148
carolinelucas      132
leodicaprio        120
algore              66
billmckibben        59
aoc                 52
naomiaklein         42
sunrisemvmt         34
khayhoe             33
tveitdal            22
eriksolheim         15
cathmckenna         12
ed_miliband         10
unfccc              10
leohickman           7
yalee360             7
markruffalo          6
extinctionr          5
nrdc                 5
greenpeaceuk         5
dwallacewells        5
dpcarrington         4
chrisgpackham        4
guardianeco          3
grist                3
pauledawson          3
bradplumer           3
usclimatestrike      2
simondonner          2
michaelemann         2
thisiszerohour       2
climateprogress      2
sierraclub           2
revkin               1
maryheglar           1
johnupton            1
lisalsong            1
350                  1
ayanaeliza           1
billnye              1
climatehome          1
jayinslee            1
james_bg   

In [37]:
affirm_tweets.sort_values('likes_count',ascending=False).head(1000)['username'].value_counts()

gretathunberg      162
carolinelucas      128
leodicaprio        127
mikehudema         124
algore              72
aoc                 55
sunrisemvmt         53
naomiaklein         49
khayhoe             37
billmckibben        34
cathmckenna         28
eriksolheim         17
tveitdal            16
markruffalo         14
maryheglar          10
ed_miliband          9
extinctionr          6
leohickman           6
jayinslee            5
chrisgpackham        5
bradplumer           4
dpcarrington         4
grist                3
nrdc                 3
michaelemann         2
unfccc               2
drkatemarvel         2
amelia_womack        2
greenpeaceuk         2
usclimatestrike      2
evanlweber           1
climatehome          1
anniemleonard        1
guardianeco          1
pauledawson          1
350                  1
estherngumbi         1
yalee360             1
sasjabeslik          1
thisiszerohour       1
sierraclub           1
simondonner          1
johnupton            1
the_ecologi

In [18]:
print(deny_tweets['retweets_count'].mean())
print(deny_tweets['retweets_count'].max())
print(deny_tweets['retweets_count'].min())

15.321327615449318
97641
0


In [20]:
print(deny_tweets['likes_count'].mean())
print(deny_tweets['likes_count'].max())
print(deny_tweets['likes_count'].min())

41.273731057211016
222429
0


In [19]:
print(affirm_tweets['retweets_count'].mean())
print(affirm_tweets['retweets_count'].max())
print(affirm_tweets['retweets_count'].min())

38.24847704959229
87661
0


In [21]:
print(affirm_tweets['likes_count'].mean())
print(affirm_tweets['likes_count'].max())
print(affirm_tweets['likes_count'].min())

83.08710496442839
430370
0


In [None]:
deny_tweets.plot.hist(by='retweets_count',bins=10)