In [1]:
import torch
import pandas as pd
import random
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoModel, AutoTokenizer, get_scheduler
from datasets import Dataset, DatasetDict
from tqdm import tqdm
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import f1_score, accuracy_score as acc, precision_score as prec, recall_score as rec
import ipdb
import re
import gc
from nrclex import NRCLex
from collections import Counter
from sklearn.metrics import f1_score, accuracy_score as acc, precision_score as prec, recall_score as rec

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set(style='darkgrid', context='notebook', font_scale=1.5, rc={'figure.figsize':(14,10)})

In [2]:
## Free up memory
gc.collect()
torch.cuda.empty_cache()

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Choose gpu or cpu
if torch.cuda.is_available():
    device = torch.device('cuda:2')
else:
    device = torch.device('cpu')
    
BATCH_SIZE = 32

# Set random seeds for reproducibility on a specific machine
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
torch.manual_seed(1)
torch.cuda.manual_seed(1)
random.seed(1)
np.random.seed(1)
np.random.RandomState(1)

print(device)

cpu


In [3]:
def replace_ent(tweet, ent):
    'Find entity name in tweet and replace with Doe' 
    
    pattern = re.compile(r"\@" + ent, re.IGNORECASE)
    return re.sub(pattern, "@USER", tweet)

In [4]:
df = pd.read_csv('../../data-annotation/maj_df_split_spec.tsv', sep='\t')

# Binarize specificity for now
df['spec'] = df['Specificity'].apply(lambda x: 1 if x>3.5 else -1)

# Positive affect and negative affect
df['affect'] = df.apply(lambda x: 1 if (x['Feeling']=='warm' or x['Behavior']=='app') else -1, axis=1)

df['tweet_clean'] = df.apply(lambda x: replace_ent(tweet=x['tweet_tokenized'], ent=x['mentname']), axis=1)

In [5]:
df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,TweetId,username,mentname,Date,Predom,Feeling,Behavior,Admiration,Admiration_Int,Anger,Anger_Int,Disgust,Disgust_Int,Fear,Fear_Int,Interest,Interest_Int,Joy,Joy_Int,Sadness,Sadness_Int,Surprise,Surprise_Int,group,party,Split,tweet_tokenized,root_lemma,root_pos,conc_m,conc_sd,Specificity,spec,affect,tweet_clean
0,0,0,1001885064974790657,reprokhanna,reprichmond,2018-05-30,predom,warm,app,True,3.000000,False,2.000000,False,2.000000,False,2.0,False,2.333333,False,2.000000,False,2.0,False,2.0,1,D,train,Admire @OfficialCBC Chairman @reprichmond 's moral voice on issues of racism and restorative justice . He is a real leader for our nation and Congress .,voice,6,4.13,1.01,3.761914,1,1,Admire @OfficialCBC Chairman @USER 's moral voice on issues of racism and restorative justice . He is a real leader for our nation and Congress .
1,1,1,1002970603165536258,senatorcantwell,pattymurray,2018-06-02,predom,warm,app,False,2.000000,False,2.000000,False,2.000000,False,2.0,False,2.000000,True,2.333333,False,2.0,False,2.0,1,D,test,It was wonderful to celebrate the 150th anniversary of Washington ’s Pilotage Act this week with @pattymurray and this fantastic group in Port Townsend .,wonderful,2,1.82,1.12,4.046838,1,1,It was wonderful to celebrate the 150th anniversary of Washington ’s Pilotage Act this week with @USER and this fantastic group in Port Townsend .
2,2,2,1003713413292462082,pattymurray,timkaine,2018-06-04,predom,warm,app,False,2.333333,False,2.000000,False,2.000000,False,2.0,False,2.000000,True,2.333333,False,2.0,False,2.0,1,D,train,"I ’m proud to support legislation Senator @timkaine introduced to protect children if they are separated from their parents , and I ’m going to continue looking at what else we can to do end the Trump Administration 's deeply wrong policy .",proud,2,2.07,1.39,3.608603,1,1,"I ’m proud to support legislation Senator @USER introduced to protect children if they are separated from their parents , and I ’m going to continue looking at what else we can to do end the Trump Administration 's deeply wrong policy ."
3,3,3,1004419010333691904,repdelbene,speakerryan,2018-06-06,predom,cold,disapp,False,2.000000,True,2.000000,False,2.333333,False,2.0,False,2.000000,False,2.000000,False,2.0,False,2.0,-1,D,train,"During #ImmigrantHeritageMonth , let ’s remember the numerous contributions those “ yearning to breathe free ” have made to our nation , and do the people ’s work by voting on bipartisan immigration reform measures . No more excuses , @speakerryan . #CelebrateImmigrants !",let,3,2.26,1.20,3.972930,1,-1,"During #ImmigrantHeritageMonth , let ’s remember the numerous contributions those “ yearning to breathe free ” have made to our nation , and do the people ’s work by voting on bipartisan immigration reform measures . No more excuses , @USER . #CelebrateImmigrants !"
4,4,4,1004433631950065664,senbobcasey,senatorleahy,2018-06-06,predom,warm,app,True,2.000000,False,2.000000,False,2.000000,False,2.0,False,2.000000,True,1.666667,False,2.0,False,2.0,1,D,train,Thank you @senatorleahy . The Administration is choosing to implement an inhuman policy that separates migrant children from their parents . It must end now !,thank,0,3.00,1.60,3.366325,-1,1,Thank you @USER . The Administration is choosing to implement an inhuman policy that separates migrant children from their parents . It must end now !
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3028,3028,3028,999245047202811904,repspeier,stevekingia,2018-05-23,predom,cold,disapp,False,2.000000,True,2.333333,True,2.333333,False,2.0,False,2.000000,False,2.000000,False,2.0,False,2.0,-1,D,train,. @stevekingia should keep his eyes and “ ears ” on the corn in Iowa and leave Mayor Schaaf to continue ably serving her Oakland constituents .,keep,3,2.37,1.40,3.766546,1,-1,. @USER should keep his eyes and “ ears ” on the corn in Iowa and leave Mayor Schaaf to continue ably serving her Oakland constituents .
3029,3029,3029,999289718683721728,speakerryan,gopleader,2018-05-23,predom,neutral,dunno,False,2.000000,False,2.000000,False,2.000000,False,2.0,True,2.333333,False,2.000000,False,2.0,False,2.0,1,R,train,"We ’ve lost 630,000 Americans to overdoses since 2000 . We 're in the midst of an #OpioidEpidemic . The House will soon consider more than 60 bills to combat this epidemic because we ca n’t let opioids continue to ravage our communities . More from @gopleader :",midst,14,2.33,1.24,3.766662,1,-1,"We ’ve lost 630,000 Americans to overdoses since 2000 . We 're in the midst of an #OpioidEpidemic . The House will soon consider more than 60 bills to combat this epidemic because we ca n’t let opioids continue to ravage our communities . More from @USER :"
3030,3030,3030,999329645316247552,kencalvert,gracenapolitano,2018-05-23,predom,warm,app,False,2.000000,False,2.000000,False,2.000000,False,2.0,False,2.000000,False,2.000000,False,2.0,False,2.0,-1,R,train,The House approved an amendment to the National Defense Authorization Act that I sponsored along with @gracenapolitano supporting the National Guard Youth Challenge program . Read more here :,approve,2,1.85,1.20,4.258683,1,1,The House approved an amendment to the National Defense Authorization Act that I sponsored along with @USER supporting the National Guard Youth Challenge program . Read more here :
3031,3031,3031,999381351634743299,senatorenzi,mikekellypa,2018-05-23,predom,warm,app,False,2.000000,False,2.000000,False,2.000000,False,2.0,False,2.000000,False,1.666667,False,2.0,False,2.0,1,R,test,Led a letter to President Trump with @mikekellypa and 78 other members to ensure the government protects faith - based organizations that provide child welfare services from losing federal funding because of their religious faith .,lead,0,4.10,1.23,4.266755,1,1,Led a letter to President Trump with @USER and 78 other members to ensure the government protects faith - based organizations that provide child welfare services from losing federal funding because of their religious faith .


In [6]:
df['emot'] = df['tweet_tokenized'].apply(lambda x: NRCLex(x).affect_frequencies)

In [7]:
emots = ['fear', 'anger', 'trust', 'surprise', 'sadness', 'disgust', 'joy', 'anticip']
emolex_to_plutchik = {'fear': 'e_Fear', 'anger': 'e_Anger', 'trust': 'e_Admiration', 'surprise': 'e_Surprise',
                    'sadness': 'e_Sadness', 'disgust': 'e_Disgust', 'joy': 'e_Joy', 'anticip': 'e_Interest'}

df['emots_only'] = df['emot'].apply(lambda x: {emolex_to_plutchik[a]: x[a] for a in x.keys() if a in emots})

df['top_emot'] = df['emots_only'].apply(lambda x: max(list(x.items()), key=lambda x: x[1])[0])

for e in emolex_to_plutchik.values():
    df.loc[:, e] = df['emots_only'].apply(lambda x: True if x[e]>0.001 else False)
    
def emo2sent(x):
    if abs(x['positive'] - x['negative']) < 0.01:
        return 'NEUTRAL'
    elif x['positive'] > x['negative']:
        return 'POSITIVE'
    else:
        return 'NEGATIVE'
    
df['emolex_senti'] = df['emot'].apply(lambda x: emo2sent(x))

In [8]:
# x_labels = ['All', 'In-group', 'Out-group']
emot_ans = ['Admiration', 'Anger', 'Disgust', 'Fear', 'Interest', 'Joy', 'Sadness', 'Surprise']

# maj_emots_all = df.loc[:, emot_ans].astype(int)
# emot_dist_all = maj_emots_all.sum().to_dict()
# data_all = [emot_dist_all[a] for a in emot_ans]


# maj_emots_in = df[df['group']==1].loc[:, emot_ans].astype(int)
# emot_dist_in = maj_emots_in.sum().to_dict()
# data_in = [emot_dist_in[a] for a in emot_ans]

# maj_emots_out = df[df['group']==-1].loc[:, emot_ans].astype(int)
# emot_dist_out = maj_emots_out.sum().to_dict()
# data_out = [emot_dist_out[a] for a in emot_ans]

# emprop = pd.DataFrame({'All': data_all, 'In-group': data_in, 'Out-group': data_out}).T
# emprop.columns = emot_ans
# emprop=emprop.div(emprop.sum(axis=1), axis=0)

# # create stacked bar chart
# fig=emprop.plot(kind='bar', stacked=True)
# fig.grid(axis='x')
# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
# # labels for x & y axis
# # plt.xlabel('Type')
# plt.ylabel('Proportion')
 
# # title of plot
# plt.title('Proportion of emotions for all annotations')
# plt.show()

In [9]:
# x_labels = ['All', 'In-group', 'Out-group']
emot_ans2 = ['e_Admiration', 'e_Anger', 'e_Disgust', 'e_Fear', 'e_Interest', 'e_Joy', 'e_Sadness', 'e_Surprise']

# maj_emots_all = df.loc[:, emot_ans2].astype(int)
# emot_dist_all = maj_emots_all.sum().to_dict()
# data_all = [emot_dist_all[a] for a in emot_ans2]


# maj_emots_in = df[df['group']==1].loc[:, emot_ans2].astype(int)
# emot_dist_in = maj_emots_in.sum().to_dict()
# data_in = [emot_dist_in[a] for a in emot_ans2]

# maj_emots_out = df[df['group']==-1].loc[:, emot_ans2].astype(int)
# emot_dist_out = maj_emots_out.sum().to_dict()
# data_out = [emot_dist_out[a] for a in emot_ans2]

# emprop = pd.DataFrame({'All': data_all, 'In-group': data_in, 'Out-group': data_out}).T
# emprop.columns = emot_ans2
# emprop=emprop.div(emprop.sum(axis=1), axis=0)

# # create stacked bar chart
# fig=emprop.plot(kind='bar', stacked=True)
# fig.grid(axis='x')
# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
# # labels for x & y axis
# # plt.xlabel('Type')
# plt.ylabel('Proportion')
 
# # title of plot
# plt.title('Proportion of emotions for all using EMOLEX')
# plt.show()

In [10]:
df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,TweetId,username,mentname,Date,Predom,Feeling,Behavior,Admiration,Admiration_Int,Anger,Anger_Int,Disgust,Disgust_Int,Fear,Fear_Int,Interest,Interest_Int,Joy,Joy_Int,Sadness,Sadness_Int,Surprise,Surprise_Int,group,party,Split,tweet_tokenized,root_lemma,root_pos,conc_m,conc_sd,Specificity,spec,affect,tweet_clean,emot,emots_only,top_emot,e_Fear,e_Anger,e_Admiration,e_Surprise,e_Sadness,e_Disgust,e_Joy,e_Interest,emolex_senti
0,0,0,1001885064974790657,reprokhanna,reprichmond,2018-05-30,predom,warm,app,True,3.000000,False,2.000000,False,2.000000,False,2.0,False,2.333333,False,2.000000,False,2.0,False,2.0,1,D,train,Admire @OfficialCBC Chairman @reprichmond 's moral voice on issues of racism and restorative justice . He is a real leader for our nation and Congress .,voice,6,4.13,1.01,3.761914,1,1,Admire @OfficialCBC Chairman @USER 's moral voice on issues of racism and restorative justice . He is a real leader for our nation and Congress .,"{'fear': 0.0, 'anger': 0.07142857142857142, 'anticip': 0.0, 'trust': 0.42857142857142855, 'surprise': 0.0, 'positive': 0.35714285714285715, 'negative': 0.0, 'sadness': 0.0, 'disgust': 0.0, 'joy': 0.07142857142857142, 'anticipation': 0.07142857142857142}","{'e_Fear': 0.0, 'e_Anger': 0.07142857142857142, 'e_Interest': 0.0, 'e_Admiration': 0.42857142857142855, 'e_Surprise': 0.0, 'e_Sadness': 0.0, 'e_Disgust': 0.0, 'e_Joy': 0.07142857142857142}",e_Admiration,False,True,True,False,False,False,True,False,POSITIVE
1,1,1,1002970603165536258,senatorcantwell,pattymurray,2018-06-02,predom,warm,app,False,2.000000,False,2.000000,False,2.000000,False,2.0,False,2.000000,True,2.333333,False,2.0,False,2.0,1,D,test,It was wonderful to celebrate the 150th anniversary of Washington ’s Pilotage Act this week with @pattymurray and this fantastic group in Port Townsend .,wonderful,2,1.82,1.12,4.046838,1,1,It was wonderful to celebrate the 150th anniversary of Washington ’s Pilotage Act this week with @USER and this fantastic group in Port Townsend .,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 'trust': 0.25, 'surprise': 0.25, 'positive': 0.25, 'negative': 0.0, 'sadness': 0.0, 'disgust': 0.0, 'joy': 0.25}","{'e_Fear': 0.0, 'e_Anger': 0.0, 'e_Interest': 0.0, 'e_Admiration': 0.25, 'e_Surprise': 0.25, 'e_Sadness': 0.0, 'e_Disgust': 0.0, 'e_Joy': 0.25}",e_Admiration,False,False,True,True,False,False,True,False,POSITIVE
2,2,2,1003713413292462082,pattymurray,timkaine,2018-06-04,predom,warm,app,False,2.333333,False,2.000000,False,2.000000,False,2.0,False,2.000000,True,2.333333,False,2.0,False,2.0,1,D,train,"I ’m proud to support legislation Senator @timkaine introduced to protect children if they are separated from their parents , and I ’m going to continue looking at what else we can to do end the Trump Administration 's deeply wrong policy .",proud,2,2.07,1.39,3.608603,1,1,"I ’m proud to support legislation Senator @USER introduced to protect children if they are separated from their parents , and I ’m going to continue looking at what else we can to do end the Trump Administration 's deeply wrong policy .","{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 'trust': 0.3, 'surprise': 0.0, 'positive': 0.3, 'negative': 0.1, 'sadness': 0.0, 'disgust': 0.0, 'joy': 0.1, 'anticipation': 0.2}","{'e_Fear': 0.0, 'e_Anger': 0.0, 'e_Interest': 0.0, 'e_Admiration': 0.3, 'e_Surprise': 0.0, 'e_Sadness': 0.0, 'e_Disgust': 0.0, 'e_Joy': 0.1}",e_Admiration,False,False,True,False,False,False,True,False,POSITIVE
3,3,3,1004419010333691904,repdelbene,speakerryan,2018-06-06,predom,cold,disapp,False,2.000000,True,2.000000,False,2.333333,False,2.0,False,2.000000,False,2.000000,False,2.0,False,2.0,-1,D,train,"During #ImmigrantHeritageMonth , let ’s remember the numerous contributions those “ yearning to breathe free ” have made to our nation , and do the people ’s work by voting on bipartisan immigration reform measures . No more excuses , @speakerryan . #CelebrateImmigrants !",let,3,2.26,1.20,3.972930,1,-1,"During #ImmigrantHeritageMonth , let ’s remember the numerous contributions those “ yearning to breathe free ” have made to our nation , and do the people ’s work by voting on bipartisan immigration reform measures . No more excuses , @USER . #CelebrateImmigrants !","{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 'trust': 0.2857142857142857, 'surprise': 0.0, 'positive': 0.2857142857142857, 'negative': 0.14285714285714285, 'sadness': 0.0, 'disgust': 0.0, 'joy': 0.14285714285714285, 'anticipation': 0.14285714285714285}","{'e_Fear': 0.0, 'e_Anger': 0.0, 'e_Interest': 0.0, 'e_Admiration': 0.2857142857142857, 'e_Surprise': 0.0, 'e_Sadness': 0.0, 'e_Disgust': 0.0, 'e_Joy': 0.14285714285714285}",e_Admiration,False,False,True,False,False,False,True,False,POSITIVE
4,4,4,1004433631950065664,senbobcasey,senatorleahy,2018-06-06,predom,warm,app,True,2.000000,False,2.000000,False,2.000000,False,2.0,False,2.000000,True,1.666667,False,2.0,False,2.0,1,D,train,Thank you @senatorleahy . The Administration is choosing to implement an inhuman policy that separates migrant children from their parents . It must end now !,thank,0,3.00,1.60,3.366325,-1,1,Thank you @USER . The Administration is choosing to implement an inhuman policy that separates migrant children from their parents . It must end now !,"{'fear': 0.16666666666666666, 'anger': 0.16666666666666666, 'anticip': 0.0, 'trust': 0.16666666666666666, 'surprise': 0.0, 'positive': 0.0, 'negative': 0.16666666666666666, 'sadness': 0.16666666666666666, 'disgust': 0.16666666666666666, 'joy': 0.0}","{'e_Fear': 0.16666666666666666, 'e_Anger': 0.16666666666666666, 'e_Interest': 0.0, 'e_Admiration': 0.16666666666666666, 'e_Surprise': 0.0, 'e_Sadness': 0.16666666666666666, 'e_Disgust': 0.16666666666666666, 'e_Joy': 0.0}",e_Fear,True,True,True,False,True,True,False,False,NEGATIVE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3028,3028,3028,999245047202811904,repspeier,stevekingia,2018-05-23,predom,cold,disapp,False,2.000000,True,2.333333,True,2.333333,False,2.0,False,2.000000,False,2.000000,False,2.0,False,2.0,-1,D,train,. @stevekingia should keep his eyes and “ ears ” on the corn in Iowa and leave Mayor Schaaf to continue ably serving her Oakland constituents .,keep,3,2.37,1.40,3.766546,1,-1,. @USER should keep his eyes and “ ears ” on the corn in Iowa and leave Mayor Schaaf to continue ably serving her Oakland constituents .,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 'trust': 0.16666666666666666, 'surprise': 0.16666666666666666, 'positive': 0.16666666666666666, 'negative': 0.16666666666666666, 'sadness': 0.16666666666666666, 'disgust': 0.0, 'joy': 0.0, 'anticipation': 0.16666666666666666}","{'e_Fear': 0.0, 'e_Anger': 0.0, 'e_Interest': 0.0, 'e_Admiration': 0.16666666666666666, 'e_Surprise': 0.16666666666666666, 'e_Sadness': 0.16666666666666666, 'e_Disgust': 0.0, 'e_Joy': 0.0}",e_Admiration,False,False,True,True,True,False,False,False,NEUTRAL
3029,3029,3029,999289718683721728,speakerryan,gopleader,2018-05-23,predom,neutral,dunno,False,2.000000,False,2.000000,False,2.000000,False,2.0,True,2.333333,False,2.000000,False,2.0,False,2.0,1,R,train,"We ’ve lost 630,000 Americans to overdoses since 2000 . We 're in the midst of an #OpioidEpidemic . The House will soon consider more than 60 bills to combat this epidemic because we ca n’t let opioids continue to ravage our communities . More from @gopleader :",midst,14,2.33,1.24,3.766662,1,-1,"We ’ve lost 630,000 Americans to overdoses since 2000 . We 're in the midst of an #OpioidEpidemic . The House will soon consider more than 60 bills to combat this epidemic because we ca n’t let opioids continue to ravage our communities . More from @USER :","{'fear': 0.13333333333333333, 'anger': 0.13333333333333333, 'anticip': 0.0, 'trust': 0.06666666666666667, 'surprise': 0.06666666666666667, 'positive': 0.06666666666666667, 'negative': 0.2, 'sadness': 0.13333333333333333, 'disgust': 0.06666666666666667, 'joy': 0.0, 'anticipation': 0.13333333333333333}","{'e_Fear': 0.13333333333333333, 'e_Anger': 0.13333333333333333, 'e_Interest': 0.0, 'e_Admiration': 0.06666666666666667, 'e_Surprise': 0.06666666666666667, 'e_Sadness': 0.13333333333333333, 'e_Disgust': 0.06666666666666667, 'e_Joy': 0.0}",e_Fear,True,True,True,True,True,True,False,False,NEGATIVE
3030,3030,3030,999329645316247552,kencalvert,gracenapolitano,2018-05-23,predom,warm,app,False,2.000000,False,2.000000,False,2.000000,False,2.0,False,2.000000,False,2.000000,False,2.0,False,2.0,-1,R,train,The House approved an amendment to the National Defense Authorization Act that I sponsored along with @gracenapolitano supporting the National Guard Youth Challenge program . Read more here :,approve,2,1.85,1.20,4.258683,1,1,The House approved an amendment to the National Defense Authorization Act that I sponsored along with @USER supporting the National Guard Youth Challenge program . Read more here :,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 'trust': 0.5, 'surprise': 0.0, 'positive': 0.5, 'negative': 0.0, 'sadness': 0.0, 'disgust': 0.0, 'joy': 0.0}","{'e_Fear': 0.0, 'e_Anger': 0.0, 'e_Interest': 0.0, 'e_Admiration': 0.5, 'e_Surprise': 0.0, 'e_Sadness': 0.0, 'e_Disgust': 0.0, 'e_Joy': 0.0}",e_Admiration,False,False,True,False,False,False,False,False,POSITIVE
3031,3031,3031,999381351634743299,senatorenzi,mikekellypa,2018-05-23,predom,warm,app,False,2.000000,False,2.000000,False,2.000000,False,2.0,False,2.000000,False,1.666667,False,2.0,False,2.0,1,R,test,Led a letter to President Trump with @mikekellypa and 78 other members to ensure the government protects faith - based organizations that provide child welfare services from losing federal funding because of their religious faith .,lead,0,4.10,1.23,4.266755,1,1,Led a letter to President Trump with @USER and 78 other members to ensure the government protects faith - based organizations that provide child welfare services from losing federal funding because of their religious faith .,"{'fear': 0.05263157894736842, 'anger': 0.05263157894736842, 'anticip': 0.0, 'trust': 0.15789473684210525, 'surprise': 0.0, 'positive': 0.21052631578947367, 'negative': 0.10526315789473684, 'sadness': 0.05263157894736842, 'disgust': 0.0, 'joy': 0.15789473684210525, 'anticipation': 0.21052631578947367}","{'e_Fear': 0.05263157894736842, 'e_Anger': 0.05263157894736842, 'e_Interest': 0.0, 'e_Admiration': 0.15789473684210525, 'e_Surprise': 0.0, 'e_Sadness': 0.05263157894736842, 'e_Disgust': 0.0, 'e_Joy': 0.15789473684210525}",e_Admiration,True,True,True,False,True,False,True,False,POSITIVE


In [11]:
# Counter(df['Behavior'].values)

In [12]:
# x_labels = ['All', 'In-group', 'Out-group']
# feelings = ['warm', 'cold', 'neutral', 'dunno', 'mixed']

# feeling_all_dict = Counter(df.loc[:, 'Feeling'].values)
# data_all = [feeling_all_dict[a] for a in feelings]


# feeling_in_dict = Counter(df[df['group']==1].loc[:, 'Feeling'].values)
# data_in = [feeling_in_dict[a] for a in feelings]

# feeling_out_dict = Counter(df[df['group']==-1].loc[:, 'Feeling'].values)
# data_out = [feeling_out_dict[a] for a in feelings]

# emprop = pd.DataFrame({'All': data_all, 'In-group': data_in, 'Out-group': data_out}).T
# emprop.columns = feelings
# emprop=emprop.div(emprop.sum(axis=1), axis=0)

# # create stacked bar chart
# fig=emprop.plot(kind='bar', stacked=True)
# fig.grid(axis='x')
# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
# # labels for x & y axis
# # plt.xlabel('Type')
# plt.ylabel('Proportion')
 
# # title of plot
# plt.title('Feeling for all Annotations')
# plt.show()

In [13]:
# x_labels = ['All', 'In-group', 'Out-group']
# behs = ['app', 'disapp', 'neutral', 'dunno', 'mixed']

# beh_all_dict = Counter(df.loc[:, 'Behavior'].values)
# data_all = [beh_all_dict[a] for a in behs]


# beh_in_dict = Counter(df[df['group']==1].loc[:, 'Behavior'].values)
# data_in = [beh_in_dict[a] for a in behs]

# beh_out_dict = Counter(df[df['group']==-1].loc[:, 'Behavior'].values)
# data_out = [beh_out_dict[a] for a in behs]

# emprop = pd.DataFrame({'All': data_all, 'In-group': data_in, 'Out-group': data_out}).T
# emprop.columns = behs
# emprop=emprop.div(emprop.sum(axis=1), axis=0)

# # create stacked bar chart
# fig=emprop.plot(kind='bar', stacked=True)
# fig.grid(axis='x')
# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
# # labels for x & y axis
# # plt.xlabel('Type')
# plt.ylabel('Proportion')
 
# # title of plot
# plt.title('Approval for all Annotations')
# plt.show()

In [14]:
# x_labels = ['All', 'In-group', 'Out-group']
# sentis = ['POSITIVE', 'NEGATIVE', 'NEUTRAL']

# senti_all_dict = Counter(df.loc[:, 'emolex_senti'].values)
# data_all = [senti_all_dict[a] for a in sentis]


# senti_in_dict = Counter(df[df['group']==1].loc[:, 'emolex_senti'].values)
# data_in = [senti_in_dict[a] for a in sentis]

# senti_out_dict = Counter(df[df['group']==-1].loc[:, 'emolex_senti'].values)
# data_out = [senti_out_dict[a] for a in sentis]

# emprop = pd.DataFrame({'All': data_all, 'In-group': data_in, 'Out-group': data_out}).T
# emprop.columns = sentis
# emprop=emprop.div(emprop.sum(axis=1), axis=0)

# # create stacked bar chart
# fig=emprop.plot(kind='bar', stacked=True)
# fig.grid(axis='x')
# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
# # labels for x & y axis
# # plt.xlabel('Type')
# plt.ylabel('Proportion')
 
# # title of plot
# plt.title('Proportion of emotions for all')
# plt.show()

In [15]:
dev_df = df[df['Split']=='dev']

In [16]:
emolex_preds=dev_df.loc[:, emot_ans2].astype(int).values
true_preds=dev_df.loc[:, emot_ans].astype(int).values
f1_score(true_preds, emolex_preds, average='micro'), prec(true_preds, emolex_preds, average='micro'), rec(true_preds, emolex_preds, average='micro')

(0.2769556025369979, 0.21510673234811165, 0.3887240356083086)

In [17]:
indiv_f1s = f1_score(true_preds, emolex_preds, average=None, zero_division=0)
print({emot_ans[i]: np.round(indiv_f1s[i], 3) for i in range(len(emot_ans))})

{'Admiration': 0.411, 'Anger': 0.266, 'Disgust': 0.32, 'Fear': 0.0, 'Interest': 0.0, 'Joy': 0.486, 'Sadness': 0.103, 'Surprise': 0.0}


In [18]:
test_df = df[df['Split']=='test']

emolex_preds=test_df.loc[:, emot_ans2].astype(int).values
true_preds=test_df.loc[:, emot_ans].astype(int).values
print(np.round(f1_score(true_preds, emolex_preds, average='micro'),3))

emolex_predsin=test_df[test_df['group']==1].loc[:, emot_ans2].astype(int).values
true_predsin=test_df[test_df['group']==1].loc[:, emot_ans].astype(int).values

emolex_predsout=test_df[test_df['group']==-1].loc[:, emot_ans2].astype(int).values
true_predsout=test_df[test_df['group']==-1].loc[:, emot_ans].astype(int).values

print("In group F1: ", np.round(f1_score(true_predsin, emolex_predsin, average='micro'), 3))
print("Out group F1: ", np.round(f1_score(true_predsout, emolex_predsout, average='micro'), 3))

indiv_f1s = f1_score(true_preds, emolex_preds, average=None, zero_division=0)
print({emot_ans[i]: np.round(indiv_f1s[i], 3) for i in range(len(emot_ans))})

0.248
In group F1:  0.24
Out group F1:  0.254
{'Admiration': 0.375, 'Anger': 0.266, 'Disgust': 0.225, 'Fear': 0.0, 'Interest': 0.0, 'Joy': 0.484, 'Sadness': 0.043, 'Surprise': 0.0}


In [20]:
true_zero = [np.all(x==0) for x in true_preds]
pred_zero = [np.all(x==0) for x in emolex_preds]
null_f1 = f1_score(true_zero, pred_zero)
print("\nNull emotions F1 score:", np.round(null_f1, 3))


Null emotions F1 score: 0.222


In [24]:
Counter(true_zero)

Counter({False: 262, True: 63})

In [23]:
Counter(pred_zero)

Counter({False: 235, True: 90})