In [1]:
import pickle
import numpy as np
import pandas as pd
from collections import Counter

In [2]:
# Functions for importing & cleaning relevant tweets
def lower(s):
    return s.lower()

def tweet_imports(filename):
    imp = pd.read_pickle(filename)
    imp = imp.drop_duplicates()
    imp['tweet_clean'] = imp['tweet'].str.replace('http\S+|www.\S+|pic.twitter.com\S+', '', case=False)
    imp['tweet_clean'] =imp['tweet_clean'].replace('[^A-Za-z0-9 ]+','',regex=True)
    imp['tweet_clean'] = imp['tweet_clean'].apply(lower)#map(lambda x: x.lower(), imp['tweet_clean'])
    imp['date'] = pd.to_datetime(imp['date'])
    return imp

In [3]:
def log_odds(l1,l2):
    counts_l1 = Counter(l1)
    counts_l2 = Counter(l2)
    counts_l1.update({t: 0.5 for t in counts_l2 if t not in counts_l1})
    counts_l2.update({t: 0.5 for t in counts_l1 if t not in counts_l2})
    sum_1 = sum(counts_l1.values())
    sum_2 = sum(counts_l2.values())
    freqs_1 = {t: counts_l1[t]*1./sum_1 for t in counts_l1}
    freqs_2 = {t: counts_l2[t]*1./sum_2 for t in counts_l2}
    odds_1 = {t: freqs_1[t]/(1-freqs_1[t]) for t in freqs_1}
    odds_2 = {t: freqs_2[t]/(1-freqs_2[t]) for t in freqs_2}
    odds_ratios = {t: odds_1[t]/odds_2[t] for t in odds_1}
    return {t: np.log(odds_ratios[t]) for t in odds_ratios}

In [4]:
from nltk.tokenize import MWETokenizer
tk = MWETokenizer()
tk.add_mwe(('climate','change'))
tk.add_mwe(('global','warming'))
tk.add_mwe(('one','world'))
tk.add_mwe(('new','jobs'))
tk.add_mwe(('carbon','tax'))
tk.add_mwe(('carbon','neutral'))

In [68]:
tk.tokenize('Global warming is exaggerated'.lower().split())

['global_warming', 'is', 'exaggerated']

In [39]:
import nltk

from nltk.stem.porter import *
stemmer = PorterStemmer()

In [137]:
import re

In [156]:
liwc_dict = {}
with open('/Users/yiweiluo/Downloads/datasets/en_liwc.txt','r') as f:
    for line in f:
        split_line = line.strip().split(': ')
        cat = split_line[0]
        cat_words = split_line[1].split()
        cat_words = [w.split('*')[0] for w in cat_words]
        liwc_dict[cat] = cat_words

In [146]:
liwc_dict.keys()

dict_keys(['Ppron', 'Inhib', 'Space', 'Filler', 'Ipron', 'Time', 'Percept', 'Verbs', 'Quant', 'Discrep', 'Relativ', 'Affect', 'You', 'Cause', 'Prep', 'Relig', 'Body', 'Bio', 'We', 'Assent', 'Incl', 'Leisure', 'AuxVb', 'Hear', 'They', 'Posemo', 'Article', 'Excl', 'Home', 'Friends', 'Present', 'Numbers', 'CogMech', 'I', 'Work', 'Tentat', 'Ingest', 'Motion', 'Anger', 'Achiev', 'Swear', 'Death', 'Social', 'Nonflu', 'Family', 'Pronoun', 'Funct', 'Feel', 'Certain', 'Insight', 'Humans', 'Sad', 'Past', 'See', 'Future', 'Adverbs', 'SheHe', 'Money', 'Negate', 'Health', 'Conj', 'Anx', 'Negemo', 'Sexual'])

In [157]:
pd.DataFrame.from_dict(liwc_dict,orient='index')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,906,907,908,909,910,911,912,913,914,915
Ppron,y'all,ive,weve,she'll,you'd,thoust,mine,his,shes,theyd,...,,,,,,,,,,
Inhib,discourag,held,duti,hesita,harness,blocked,suppress,yield,tight,contradic,...,,,,,,,,,,
Space,bending,breadth,taller,spaced,area,shaping,over,remote,bottom,roommate,...,,,,,,,,,,
Filler,ykn,rr,ohwell,blah,imean,yakno,youknow,idontknow,like,,...,,,,,,,,,,
Ipron,this,whats,thatll,itd,it,whatever,anyone,itself,somebod,nobod,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Health,gland,therap,swelling,sickly,thermometer,addict,wheez,thyroid,physician,twitch,...,,,,,,,,,,
Conj,and,altho,as,because,if,till,whereas,when,cuz,til,...,,,,,,,,,,
Anx,stunned,shaky,terror,fear,hesita,tense,nervous,fearful,inhib,reluctan,...,,,,,,,,,,
Negemo,gloom,liabilit,vile,tortur,protest,ugh,troubl,hating,messy,hate,...,,,,,,,,,,


In [135]:
RELEVANT_LIWC_CATS = ['Inhib','Affect','Cause','Relig','Body','Bio','Posemo','Home','Work','Anger','Social',
                     'Family','Humans','Sad','Future','Past','Money','Negate','Health','Negemo']

In [167]:
def log_odds_liwc(odds_dict,liwc_cat):
    liwc_words = liwc_dict[liwc_cat]
    liwc_odds = {w: odds_dict[w] for w in liwc_words if w in odds_dict}
    return liwc_odds

# News data

In [7]:
news_df = pd.read_pickle('/Users/yiweiluo/Dropbox/research/QP2/code/Fox_and_friends/\
all_urls_meta_and_fulltext_df_2020.pkl')

In [15]:
news_df = news_df.reset_index()
climate_news = news_df[news_df['topic'] == 'cc']

In [16]:
climate_news.head()

Unnamed: 0,index,bad NYT,date,domain,fulltext,stance,title,topic,url
1600,1600,False,,:///,If you can't sleep as much as you need durin...,anti,,cc,foxnews.com/lifestyle/climate-change-might-be-...
1601,1601,False,,:///,Paper says draft document by scientists direct...,anti,,cc,foxnews.com/opinion/climate-policy-get-ready-f...
1602,1602,False,,:///,The public's concern about global warming has ...,anti,,cc,foxnews.com/opinion/physicist-dont-fall-for-th...
1603,1603,False,,:///,The federal report by dozens of U.S. governmen...,anti,,cc,foxnews.com/politics/junk-science-studies-behi...
1604,1604,False,,:///,File photo - A statue of the Virgin Mary is ...,anti,,cc,foxnews.com/science/10-years-after-hurricane-k...


In [67]:
climate_news[climate_news['stance'] == 'anti'].shape

(5067, 9)

Tokenize and pre-process fulltext from each side:

In [89]:
pro_news_tokens = [tk.tokenize(climate_news.iloc[row_ix]['fulltext'].lower().split()) 
                   for row_ix in range(len(climate_news.index))
                  if climate_news.iloc[row_ix]['fulltext'] is not None and 
                   len(climate_news.iloc[row_ix]['fulltext']) > 0 and climate_news.iloc[row_ix]['stance'] == 'pro']
anti_news_tokens = [tk.tokenize(climate_news.iloc[row_ix]['fulltext'].lower().split()) 
                    for row_ix in range(len(climate_news.index))
                  if climate_news.iloc[row_ix]['fulltext'] is not None and 
                    len(climate_news.iloc[row_ix]['fulltext']) > 0 and climate_news.iloc[row_ix]['stance'] == 'anti']

In [93]:
pro_news_tokens = [item for sublist in pro_news_tokens for item in sublist]
anti_news_tokens = [item for sublist in anti_news_tokens for item in sublist]

In [175]:
pickle.dump(pro_news_tokens,open('pro_news_tokens.pkl','wb'))
pickle.dump(anti_news_tokens,open('anti_news_tokens.pkl','wb'))

Remove punctuation from tokens and get stems:

In [96]:
import string
string.punctuation
punctuations = string.punctuation + '“'

In [174]:
string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [97]:
pro_news_tokens = [item.translate(str.maketrans('', '', punctuations))
                   for item in pro_news_tokens]
anti_news_tokens = [item.translate(str.maketrans('', '', punctuations))
                   for item in anti_news_tokens]

In [102]:
from spacy.lemmatizer import Lemmatizer
#from spacy.lookups import Lookups
#lookups = Lookups()
#lookups.add_table("lemma_rules", {"noun": [["s", ""]]})
lemmatizer = Lemmatizer()
lemmas = lemmatizer("ducks", "NOUN")

import nltk
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

In [103]:
lemmatizer.lemmatize('global_warming')

'global_warming'

In [104]:
lemmatizer.lemmatize('ducks')

'duck'

In [136]:
# pro_news_lemmas = [lemmatizer.lemmatize(item) for item in pro_news_tokens]
# anti_news_lemmas = [lemmatizer.lemmatize(item) for item in anti_news_tokens]
pro_news_stems = [stemmer.stem(item) for item in pro_news_tokens]
anti_news_stems = [stemmer.stem(item) for item in anti_news_tokens]

In [109]:
anti_news_lemmas[:50]

['if',
 'you',
 'cant',
 'sleep',
 'a',
 'much',
 'a',
 'you',
 'need',
 'during',
 'the',
 'week',
 'you',
 'may',
 'be',
 'able',
 'to',
 'make',
 'up',
 'for',
 'it',
 'on',
 'the',
 'weekend',
 'istock',
 'rising',
 'temperature',
 'caused',
 'by',
 'climatechange',
 'seem',
 'to',
 'be',
 'disrupting',
 'america’s',
 'sleep',
 'pattern',
 'according',
 'to',
 'a',
 'new',
 'study',
 'a',
 'temperature',
 'increase',
 'of',
 '18',
 'degree',
 'fahrenheit',
 'cause']

In [106]:
print(len(pro_news_lemmas))
print(len(anti_news_lemmas))

23597279
3577618


In [158]:
# pickle.dump(pro_news_lemmas,open('pro_news_lemmas.pkl','wb'))
# pickle.dump(anti_news_lemmas,open('anti_news_lemmas.pkl','wb'))
pickle.dump(pro_news_stems,open('pro_news_stems.pkl','wb'))
pickle.dump(anti_news_stems,open('anti_news_stems.pkl','wb'))

In [163]:
#pro_anti_news_odds = log_odds(pro_news_lemmas,anti_news_lemmas)
pro_anti_news_odds_stems = log_odds(pro_news_stems,anti_news_stems)

In [164]:
#sorted_pro_anti_news_odds = sorted(pro_anti_news_odds.items(), key=lambda x:x[1])
sorted_pro_anti_news_odds_stems = sorted(pro_anti_news_odds_stems.items(), key=lambda x:x[1])

In [123]:
sorted_pro_anti_news_odds[-100:]

[('amory', 4.397528773343627),
 ('inaudible', 4.403259575107505),
 ('zabel', 4.412727443744629),
 ('bello', 4.416493968891274),
 ('lovins', 4.418383075494853),
 ('discontinuing', 4.429577476347927),
 ('pensum', 4.435116692745144),
 ('typemates', 4.435116692745144),
 ('serif', 4.43879998051262),
 ('classsocialbuttonhighlightspan', 4.43879998051262),
 ('kouddous', 4.467787856195369),
 ('energypol', 4.467787856195369),
 ('g77', 4.478445277722948),
 ('potosí', 4.478445277722948),
 ('lidy', 4.492480765244145),
 ('dargis', 4.513171412608864),
 ('naidoo', 4.513171412608864),
 ('shiva', 4.5300925018533125),
 ('btnffcf0d', 4.546732032613432),
 ('nacpil', 4.550026970861521),
 ('getcookiesessname', 4.550026970861521),
 ('kumi', 4.5565844561101585),
 ('angelica', 4.559860009965532),
 ('abdel', 4.563099221833838),
 ('font', 4.591901593898817),
 ('kamat', 4.6013209428690995),
 ('nytoday', 4.644141532969928),
 ('wahu', 4.653083597399471),
 ('kaara', 4.653083597399471),
 ('alternateside', 4.6852036126

In [165]:
#dict_odds = dict(pro_anti_news_odds)
dict_odds_stems = dict(pro_anti_news_odds_stems)

In [120]:
dict_odds['cause']

-0.30371170224634875

In [118]:
list(dict_odds.keys())[:5]

['this', 'matter', 'is', 'best', 'disposed']

In [57]:
dict_odds['matter']

0.12973077638690833

## Of specific LIWC categories

In [171]:
for cat in RELEVANT_LIWC_CATS:
    odds = log_odds_liwc(dict_odds_stems,cat)
    pos = [w for w in odds if odds[w] > 0]
    neg = [w for w in odds if odds[w] < 0]
    print(cat,'Num pos:'+str(len(pos)),'Num neg:'+str(len(neg)))
    print('\n')

Inhib Num pos:31 Num neg:42


Affect Num pos:268 Num neg:263


Cause Num pos:19 Num neg:33


Relig Num pos:46 Num neg:55


Body Num pos:65 Num neg:58


Bio Num pos:208 Num neg:148


Posemo Num pos:127 Num neg:95


Home Num pos:35 Num neg:24


Work Num pos:111 Num neg:88


Anger Num pos:43 Num neg:75


Social Num pos:133 Num neg:94


Family Num pos:21 Num neg:9


Humans Num pos:16 Num neg:12


Sad Num pos:30 Num neg:28


Future Num pos:2 Num neg:15


Past Num pos:47 Num neg:20


Money Num pos:77 Num neg:33


Negate Num pos:4 Num neg:23


Health Num pos:84 Num neg:55


Negemo Num pos:139 Num neg:165




In [169]:
sorted(log_odds_liwc(dict_odds_stems,'Inhib').items(),key=lambda x:x[1],reverse=True)

[('abstain', 2.3573022259258765),
 ('taboo', 0.9783270965197011),
 ('uptight', 0.9103730160703509),
 ('repress', 0.8860457659255339),
 ('blocker', 0.8458350176972818),
 ('rigid', 0.808097016349421),
 ('stiff', 0.6941521640551315),
 ('constrain', 0.6585042594091605),
 ('neglect', 0.45839466669439227),
 ('prudish', 0.4403692597655841),
 ('kept', 0.3281326334128129),
 ('wait', 0.3086869412706238),
 ('suppress', 0.295088870798877),
 ('forbid', 0.2876075201285799),
 ('avert', 0.26027176148051473),
 ('stubborn', 0.24799821262081412),
 ('block', 0.22453883580060882),
 ('hangup', 0.2172256991763508),
 ('conflict', 0.20342348723609113),
 ('limit', 0.20230364795760597),
 ('bound', 0.1493517243490618),
 ('discourag', 0.14416838782199048),
 ('tidi', 0.13718344655201958),
 ('disciplin', 0.10722621919152978),
 ('curtail', 0.10055925795084329),
 ('avoid', 0.09716301072273872),
 ('delay', 0.0658976857763626),
 ('duti', 0.05160729774691386),
 ('disregard', 0.04979310634081989),
 ('rein', 0.026273651598

In [170]:
sorted(log_odds_liwc(dict_odds_stems,'Family').items(),key=lambda x:x[1],reverse=True)

[('exhusband', 1.538981971963861),
 ('aunt', 1.4336253116748678),
 ('wive', 1.0822257307087768),
 ('stepchild', 1.0281560940797485),
 ('mother', 0.810968578375855),
 ('grandson', 0.7496928670305765),
 ('daughter', 0.6498833711072465),
 ('father', 0.6415741511502484),
 ('brother', 0.5150061307002347),
 ('grandkid', 0.4891600366587553),
 ('bro', 0.4150519374458191),
 ('wife', 0.4114039892250725),
 ('sister', 0.39171447217692335),
 ('spous', 0.3890774461785668),
 ('ma', 0.3350114899330635),
 ('parent', 0.26919839683769536),
 ('cousin', 0.1961739967517232),
 ('grandpa', 0.15268720184174966),
 ('husband', 0.14847888980385401),
 ('kin', 0.11186527873048616),
 ('son', 0.009875050648873217),
 ('granddad', -0.07045644870641832),
 ('nephew', -0.0704566249324903),
 ('pa', -0.12127684007382324),
 ('mum', -0.3361599943861463),
 ('mom', -0.511360607779277),
 ('ex', -0.518481823848564),
 ('dad', -0.5688226281709716),
 ('momma', -0.9459255103565274),
 ('stepkid', -1.1690688220805328)]

In [173]:
sorted(log_odds_liwc(dict_odds_stems,'Health').items(),key=lambda x:x[1],reverse=True)

[('pediatr', 2.8562874335146753),
 ('sickest', 2.7627589282960634),
 ('throb', 2.51981228380179),
 ('flu', 2.4316736580705625),
 ('hiv', 2.243185380020233),
 ('estrogen', 2.2321297878193933),
 ('thyroid', 2.0890287747665237),
 ('sicker', 1.7213036558169006),
 ('iv', 1.6493318224902662),
 ('viagra', 1.6035205354544586),
 ('mono', 1.3958810436171398),
 ('xanax', 1.3958810436171398),
 ('gynecolog', 1.3158382935905824),
 ('obes', 1.1909341410522007),
 ('aspirin', 1.1335169392867923),
 ('asthma', 1.07377949222558),
 ('intox', 0.9999868463115243),
 ('physician', 0.9936238725240122),
 ('addict', 0.9899880227133848),
 ('pill', 0.9826980332180297),
 ('lymph', 0.9103730160703509),
 ('mammogram', 0.9103730160703509),
 ('cancer', 0.8511253711429082),
 ('checkup', 0.8458346550698607),
 ('sick', 0.8226372337488799),
 ('inflam', 0.7945423215062507),
 ('orthoped', 0.7768415810928161),
 ('cardio', 0.7768415810928161),
 ('dose', 0.7367691370340242),
 ('stiff', 0.6941521640551315),
 ('vomit', 0.676759545

# Twitter data

In [7]:
pro_sentences_fuzzy_matched = pickle.load(open('fuzzy_matched_pro_sents.pkl','rb'))
anti_sentences_fuzzy_matched = pickle.load(open('fuzzy_matched_anti_sents.pkl','rb'))

In [8]:
anti_sentences_fuzzy_matched[:5]

[['finally',
  'this',
  'is',
  'being',
  'picked',
  'up',
  'like',
  'good',
  'cheese',
  'its',
  'taken',
  'a',
  'while',
  'for',
  'median',
  'to',
  'pick',
  'up',
  'but',
  'good',
  'clmate',
  'shocker',
  'nzs',
  'zero',
  'carbo',
  'bill',
  'goes',
  'too',
  'far',
  'breaches',
  'pariss',
  'clmate',
  'agreemen'],
 ['appreciating',
  'this',
  'pieces',
  'in',
  'atest',
  'edition',
  'of',
  'the',
  'listener',
  'we',
  'note',
  'tough',
  'its',
  'not',
  'just',
  'intentional',
  'investor',
  'its',
  'carbo',
  'investor',
  'per'],
 ['addressing',
  'this',
  'in',
  'a',
  'resonable',
  'and',
  'considered',
  'manner',
  'woulda',
  'be',
  'idea',
  'zero',
  'carbo',
  'bill',
  'target',
  'unachievable',
  'retiring',
  'nationals',
  'mp'],
 ['love',
  'a',
  'good',
  'challenges',
  'posted',
  'on',
  'behalf',
  'of',
  'willie',
  'and',
  'angela',
  'falloon',
  'we',
  'belive',
  'we',
  'all',
  'have',
  'a',
  'part',
  'to'

In [46]:
cleaned_affirm_tweets

Unnamed: 0,id,date,time,tweet,mentions,replies_count,retweets_count,likes_count,hashtags,username,search_term,label,tweet_clean
0,1177060829901885441,2019-09-25,20:22:25,#IPCC just released the #SROCC - a new report ...,[],1,40,75,"['#ipcc', '#srocc']",350,ice,1,ipcc just released the srocc a new report on ...
1,1176786922687148032,2019-09-25,02:14:00,The #IPCC special report on ocean and ice is o...,[],5,83,112,"['#ipcc', '#srocc']",350,ice,1,the ipcc special report on ocean and ice is ou...
2,1164031877910618114,2019-08-20,21:30:01,Unusually warm water surrounding one of the la...,[],1,62,93,[],350,ice,1,unusually warm water surrounding one of the la...
3,1152577659710427136,2019-07-20,06:55:02,This is one of the hottest summers on record. ...,[],0,17,26,['#abolishice'],350,ice,1,this is one of the hottest summers on record t...
4,1146328119336460288,2019-07-03,01:01:36,Antarctic ice has taken a nosedive.\n\nThe amo...,[],6,106,115,[],350,ice,1,antarctic ice has taken a nosedivethe amount o...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,1175370661985828865,2019-09-21,04:26:17,As #GretaThunberg led the Climate Strike in Ne...,[],0,0,2,"['#gretathunberg', '#nantichaocharoenchai', '#...",yv4ca,climate,1,as gretathunberg led the climate strike in new...
2,1169913415294259200,2019-09-06,03:01:08,The world is running out of time 🤜a race we ca...,[],0,1,5,"['#apclimateweek', '#apyouth4climate']",yv4ca,climate,1,the world is running out of time a race we can...
3,1169535040792850437,2019-09-05,01:57:37,Will you Fight 🤜for the future! \n\nJoin us to...,[],14,149,277,"['#climateemergency', '#apclimateweek', '#apyo...",yv4ca,climate,1,will you fight for the future join us to empow...
4,1169518364760276997,2019-09-05,00:51:21,"""Everyone has a responsible role towards tackl...",['earthdaynetwork'],0,3,6,"['#apclimateweek', '#apyouth4climate']",yv4ca,climate,1,everyone has a responsible role towards tackli...


In [22]:
cleaned_affirm_tweets = tweet_imports('all_affirm_tweets.pkl')
cleaned_deny_tweets = tweet_imports('all_deny_tweets.pkl')

In [29]:
pro_sentences = [tk.tokenize(tweet.lower().split()) for tweet in cleaned_affirm_tweets['tweet_clean']]
anti_sentences = [tk.tokenize(tweet.lower().split()) for tweet in cleaned_deny_tweets['tweet_clean']]

In [None]:
# TO DO: fuzzy matching after MWE tokenization

In [41]:
fuzzy_pro_toks = [item for sublist in pro_sentences_fuzzy_matched for item in sublist]
fuzzy_anti_toks = [item for sublist in anti_sentences_fuzzy_matched for item in sublist]
#pro_toks = [item for sublist in pro_sentences for item in sublist if len(item) < 15]
#anti_toks = [item for sublist in anti_sentences for item in sublist if len(item) < 15]

In [36]:
pro_sentences[:5]

[['ipcc',
  'just',
  'released',
  'the',
  'srocc',
  'a',
  'new',
  'report',
  'on',
  'oceans',
  'and',
  'ice',
  'it',
  'reminds',
  'us',
  'of',
  'these',
  'powerful',
  'words',
  'from',
  'kathy',
  'and',
  'aka',
  'one',
  'year',
  'ago',
  'rewatch',
  'the',
  'stunning',
  '6minute',
  'film',
  'at'],
 ['the',
  'ipcc',
  'special',
  'report',
  'on',
  'ocean',
  'and',
  'ice',
  'is',
  'out',
  'and',
  'well',
  'be',
  'honest',
  'it',
  'looks',
  'bleak',
  'but',
  'we',
  'know',
  'what',
  'must',
  'be',
  'done',
  'the',
  'age',
  'of',
  'fossil',
  'fuels',
  'must',
  'endread',
  'here',
  'to',
  'find',
  'out',
  'what',
  'this',
  'science',
  'means',
  'to',
  'people',
  'on',
  'the',
  'frontlines',
  'srocc'],
 ['unusually',
  'warm',
  'water',
  'surrounding',
  'one',
  'of',
  'the',
  'largest',
  'glaciers',
  'in',
  'greenland',
  'isnt',
  'good',
  'newsa',
  'billion',
  'tons',
  'of',
  'ice',
  'lost',
  'here',
  

In [42]:
#pro_anti_log_odds_ratios = log_odds(pro_toks,anti_toks)
fuzzy_pro_anti_log_odds_ratios = log_odds(fuzzy_pro_toks,fuzzy_anti_toks)

In [43]:
#sorted_pro_anti_log_odds_ratios = sorted(pro_anti_log_odds_ratios.items(),key=lambda x:x[1],reverse=True)
fuzzy_sorted_pro_anti_log_odds_ratios = sorted(fuzzy_pro_anti_log_odds_ratios.items(),key=lambda x:x[1],reverse=True)

In [39]:
sorted_pro_anti_log_odds_ratios[:50]

[('rtdesmoguk', 8.18272521122586),
 ('icym', 8.109490114325467),
 ('go100re', 7.936759117681209),
 ('waterislife', 7.167234908505245),
 ('bikes4climate', 7.022305389143198),
 ('climatecurate', 6.963684359190483),
 ('notmx', 6.82901593928712),
 ('rtclientearth', 6.7797713782944875),
 ('rtcc', 6.527833836991624),
 ('stoppipelines', 6.414473841443638),
 ('climatemegan', 6.343901845604923),
 ('globalactplan', 6.28869820638638),
 ('inkl', 6.27311275447586),
 ('rtmcswee', 6.191319717006068),
 ('aces', 6.188516731153423),
 ('wemeanit', 6.132821107357468),
 ('powertoswitch', 6.004435134792601),
 ('tarsands', 5.973500285610109),
 ('jhiskes', 5.969587201986877),
 ('deccorals', 5.969587201986877),
 ('ue', 5.896116412284955),
 ('trewinr', 5.872854451698635),
 ('greenjobs', 5.857105614142179),
 ('rospearce', 5.757019272060145),
 ('rtcclive', 5.757019272060145),
 ('paperli', 5.702976474440348),
 ('propublica', 5.684257995432302),
 ('sustcomm', 5.684257995432302),
 ('peoplevscoal', 5.6555471686712915

In [44]:
fuzzy_sorted_pro_anti_log_odds_ratios[:50]

[('climat', 12.267923110255722),
 ('hange', 11.271826079913485),
 ('enrgy', 10.898404355987141),
 ('carbn', 10.348500391902212),
 ('arming', 9.874545755577772),
 ('eople', 9.861134738259732),
 ('climatechan', 9.821496237704082),
 ('actio', 9.778008737359297),
 ('futur', 9.226611615777788),
 ('renwable', 9.096452253465934),
 ('reort', 9.085873654668694),
 ('supprt', 8.807205345296104),
 ('indstry', 8.804274905066414),
 ('polution', 8.787704066736486),
 ('wether', 8.658588856463638),
 ('pubic', 8.576571511808739),
 ('greenewdeal', 8.560977624337001),
 ('rtdesmoguk', 8.452811150100086),
 ('impct', 8.441069789989495),
 ('politcal', 8.329640851659924),
 ('generatio', 8.182626864472995),
 ('questin', 8.103099344095229),
 ('climateation', 8.10073160723265),
 ('billon', 8.087607959629398),
 ('someting', 8.066981261748223),
 ('efficency', 8.064526400945745),
 ('mthane', 8.050915953778539),
 ('go100re', 8.049669411780211),
 ('atmosphre', 7.944551069975746),
 ('systm', 7.9064260073418),
 ('transt

In [40]:
sorted_pro_anti_log_odds_ratios[-50:]

[('irradiance', -5.933890595199233),
 ('credlin', -5.933890595199233),
 ('breitbartnews', -5.952795615652895),
 ('planethealing', -5.952795615652895),
 ('alberta411', -5.956534110926493),
 ('psiintl', -5.960258683180498),
 ('vanpoli', -5.969556770606905),
 ('climatescambs', -5.98231944022399),
 ('gh', -5.993169975737841),
 ('dmsp', -6.003904050871004),
 ('climatecult', -6.007456625638784),
 ('ir', -6.010651806261905),
 ('imager', -6.010996625507349),
 ('metop', -6.014524139188789),
 ('avhrr', -6.014524139188789),
 ('seviri', -6.014524139188789),
 ('havenr64', -6.018039254459735),
 ('mapsnorthern', -6.059284290958141),
 ('craighavenr', -6.072663109560062),
 ('ilmastonmuutos', -6.121297655861416),
 ('multisensor', -6.1244573893162455),
 ('caca', -6.136997431227124),
 ('junkscience', -6.164650521040213),
 ('adiabatic', -6.188605147382592),
 ('joannenova', -6.247553423524605),
 ('snowice', -6.251667092989312),
 ('adapt2030', -6.27906779883658),
 ('latestnews', -6.357034535301745),
 ('gotmi

In [45]:
fuzzy_sorted_pro_anti_log_odds_ratios[-50:]

[('levels', -7.80691751399),
 ('youare', -7.811587593893603),
 ('claims', -7.8309203763520205),
 ('agreat', -7.837012642822188),
 ('powers', -7.83958805564201),
 ('rnewable', -7.851085388413272),
 ('wrongi', -7.88059120171805),
 ('policya', -7.887545056452793),
 ('repor', -7.887545056452793),
 ('criss', -7.888314733906715),
 ('theory2', -7.929391724881963),
 ('temperatur', -7.941137817679738),
 ('scietific', -7.958143841771789),
 ('increse', -7.967784201426193),
 ('every1', -7.977684528447237),
 ('earths', -7.980254165279339),
 ('goings', -7.984696732325617),
 ('atmospherc', -7.993393488140591),
 ('jspry', -8.011244339861102),
 ('neverb', -8.097577921431014),
 ('tmosphere', -8.109677928228338),
 ('coulds', -8.112142012409521),
 ('lttle', -8.118581534353247),
 ('cdnoli', -8.18050972949076),
 ('youtubes', -8.195057503808066),
 ('uspol', -8.21329767816101),
 ('contro', -8.253363089487692),
 ('caused', -8.257362715667853),
 ('thatso', -8.286990643569734),
 ('belive', -8.30440647774254),
 (