# Install tortus 

In [None]:
# install in the terminal them when using for the first time

# $ pip install tortus
# $ jupyter nbextension enable --py widgetsnbextension

# Import Data

In [2]:
import pandas as pd

# make sure the csv file is in the same directory 
sample = pd.read_csv('2nd_round_bart2.csv',skipinitialspace=True)

# drop the first round annotations so we don't overwrite it 
train = sample.drop('label', axis = 1)

# filter non random cases
train = train.loc[train['drawn'] == 'randomly']

# add a uniq id by combining speech and par id 
train['uniq_id'] = train['Speech_id'].astype(str)  + '_' + train['par_id'].astype(str) 

In [3]:
# set parameter, annotate 50 at a time
from tortus import Tortus
tortus = Tortus(train, "text", num_records=50, random=False, labels=["Exclusionary", "Inclusive", "Neither"],id_column = "uniq_id")

Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00#0\x00\x00\x06\xc4\x08\x06\x00\x00\x00\xa5\xaf~d\x00\…

HTML(value="<h2 style='text-align:center'>        easy text annotation in a Jupyter Notebook</h2>")

# Start annotating 

In [4]:
tortus.annotate()

HBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00#0\x00\x00\x06\xc4\x08\x06\x00\x00\x00…

Output()

# Store results

In [69]:
# After annotations are complete, store to a variable where they can later be accessed for analysis.
annotations = tortus.annotations
# this obejct is accumulative 

# SAVE 
annotations.to_csv("nat_bart2.csv")

In [67]:
# If require future annotations, pass this dataframe as an argument in a subsequent instance of tortus.
tortus = Tortus(train, "text", num_records=50, random=False, annotations=annotations, labels=["Exclusionary", "Inclusive", "Neither"], id_column = 'uniq_id')

# and then repeat the annotating process


In [68]:
tortus.annotate()

HBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00#0\x00\x00\x06\xc4\x08\x06\x00\x00\x00…

Output()

# merge annotations to original df 

In [224]:
# after finishing annotations, concatenate with original df 
# annotations.rename(columns = {'label':'label2'}, inplace= True)


merged = pd.concat([train,annotations[['text','label']]], axis=1)
pd.set_option("display.max_rows", None, "display.max_columns", None)

# Write merged annotated data to csv

In [74]:
merged.to_csv("inc_exc_nat_bart2.csv")

In [81]:
merged = pd.read_csv("merged.csv")

# same rating
print(len(merged[merged['label'] == merged['label2']]))

pd.options.display.max_colwidth = 2000

merged[merged['label'] !=merged['label2']][['text','label', 'label2']]

486


Unnamed: 0,text,label,label2
2,"The only """"welfare"""" in this campaign is John McCain's plan to give another $200 billion in tax cuts to the wealthiest corporations in America - including $4 billion in tax breaks to big oil companies that ran up record profits under George Bush. That's who John McCain is fighting for. But we can't afford four more years like the last eight. George Bush and John McCain are out of ideas, they are out of touch, and if you stand with me in 17 days they will be out of time.",populist,not populist
10,Their technique has been ingenious and ruthless. They have done on the infighting what they couldn't do in the open. They say they believe in public housing! But they put a declared enemy of public housing in charge of the public housing program.,not populist,populist
31,"We can do it. We can do it. Let me say this. There'll be a lot of talk about change in this election. You already heard Governor Cuomo say of all the choices you have in Michigan, only one has ever balanced a government budget, only one has ever taken on lobbyists and resisted the influence of special interests. Only one has never been a part of the Washington special interest crowd. And only one made a real good choice for vice president, Al Gore of Tennessee.",not populist,populist
38,"Now, let me divert for just a minute to say something that is very much on my mind. I spoke this to a fraternal order in Washington just last Sunday, the B'nai B'rith, and I said these words and I want to share them with you because there are some people in this country today that think that the way you settle things is just by being militant, and then they define the meaning of that term. They pick and choose the laws that they want to obey and that applies to black and white alike.",not populist,populist
40,"Those same Congressional leaders who give Senator Obama his marching orders are now saying that this mess isn't their fault and they aren't going to take any action on this crisis until after the election. Senator Obama's own advisers are saying that crisis will benefit him politically. My friends, that is the kind of me-first, country-second politics that are broken in Washington. My opponent sees an economic crisis as a political opportunity instead of a time to lead. Senator Obama isn't change, he's part of the problem with Washington.",not populist,populist
55,"Very definitely, a man who deliberately courted the most radical extremist elements in his own party and who continues this appeal in his speeches and who will be fully in their debt should he win the presidency--attacks on the courts, vilification of lour law enforcement officials, group against group and race against race. These tactics have stockpiled the ingredients for civil explosion. Now, I mentioned the truth squad. They are here and they are listening to what I have to say. Well, I will tell you something: if they will tell the truth, we will win this election hands down, going away. That is what I can tell you now.",not populist,populist
58,"Now, finally, my friends, the final and the most important thing that is before us is our own country and its future. Are we going to be a divided nation, conflict and animosity and bitterness and violence? Are we going to be a nation of people that can live together as friends and neighbors, rather than antagonists and enemies? We have one candidate in this election who is an outright racist, and appeals on racial prejudice, and he is no friend of labor. Any laboring man who can find a way to support the former governor of Alabama has little or no regard for the union movement. He has not been your friend and he has not been good for the working man. But more importantly, he preaches a doctrine of separatism, a dangerous doctrine. But at least he does it openly.",not populist,populist
183,Are my opponent and Congress really in cahoots? Look at one important question: Should we limit the terms of Congress? Governor Clinton says no. Congress says no. And I say yes.,not populist,populist
195,"How can Hillary manage this country when she can't even manage her email server? It's just one disaster after another, one scandal after another, one betrayal after another. To all Americans I say: it is time for new leadership.",not populist,populist
208,"Now, Bill Clinton can walk right through the facts and look you in the eye. He tells you there's 100,000 police on the street, that he created 11 million jobs, that he took 2 million people off welfare, that he's for school choice. He goes on and on and on and on. And somebody ought to have a meter out there to check the accuracy of all these statements.",not populist,populist


In [73]:
merged = train.merge(annotations[['uniq_id', 'label']], on = 'uniq_id')
                     
merged.drop_duplicates(subset ="uniq_id", 
                     keep = "first", inplace = True) 

merged

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Speech_id,par_id,text,party,term,comp,drawn,label2,uniq_id,label
0,0,0,2157,31,America's response to the Soviet Union against...,rep,1968,False,randomly,not populist,2157_31,neither
1,1,1,3321,16,While Bill Clinton has a million little plans ...,rep,1996,True,randomly,not populist,3321_16,neither
2,2,2,2008-10-18-louis-missouri-3,11,"The only """"welfare"""" in this campaign is John ...",dem,2008,False,randomly,not populist,2008-10-18-louis-missouri-3_11,neither
3,3,3,2646,12,I do promise that in the next four years the n...,rep,1980,True,randomly,not populist,2646_12,neither
4,4,4,2204,27,"So, my friends, in this whole field of peace, ...",rep,1968,False,randomly,not populist,2204_27,neither
...,...,...,...,...,...,...,...,...,...,...,...,...
495,495,495,2016-11-03-raleigh-north-carolina,29,And I think it's fair to say that my opponent ...,dem,2016,False,randomly,not populist,2016-11-03-raleigh-north-carolina_29,inclusive
496,496,496,1968,23,Tonight we have 72.5 million people working. T...,dem,1964,False,randomly,not populist,1968_23,neither
497,497,497,3110,5,"Now, today's been a wonderful day for Barbara ...",rep,1992,True,randomly,not populist,3110_5,neither
498,498,498,2137,1,"That seems to me to be the great issue, and I ...",dem,1968,True,randomly,not populist,2137_1,neither


In [168]:
bart_annot= pd.read_csv("Annotated_Iteration_2_sample_uncertainty_bart.csv")
all200 = pd.read_csv("Iteration_2_sample_uncertainty.csv")
speech_par_id = bart_annot['speech_par_id'].tolist()
id200 = all200['speech_par_id'].tolist()
l3 = [x for x in id200 if x not in speech_par_id]
all200.loc[all200["speech_par_id"].isin(l3)]

In [81]:
import numpy as np
nat = pd.read_csv("inc_exc_nat_bart2.csv")
nat["agreement"]= np.where((nat['bart_label'] == nat['yuchen_label']), 1, 0)
len(nat[nat["agreement"] == 0])

10

In [83]:
nat.to_csv("inc_exc_bart2_crossannot_yuchen.csv")