# Install tortus 

In [None]:
# install in the terminal them when using for the first time

# $ pip install tortus
# $ jupyter nbextension enable --py widgetsnbextension

# Import Data

In [23]:
import pandas as pd

# make sure the csv file is in the same directory 
sample = pd.read_csv('annotated_par_bart_2.csv',skipinitialspace=True)

# drop the first round annotations so we don't overwrite it 
train = sample.drop('label', axis = 1)

# filter non random cases
train = train.loc[train['drawn'] == 'randomly']

# add a uniq id by combining speech and par id 
train['uniq_id'] = train['Speech_id'].astype(str)  + '_' + train['par_id'].astype(str) 

In [24]:
# set parameter, annotate 50 at a time
from tortus import Tortus
tortus = Tortus(train, "text", num_records=50, random=False, labels=['populist', 'not populist'],id_column = "uniq_id")

# Start annotating 

In [25]:
tortus.annotate()

HBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00#0\x00\x00\x06\xc4\x08\x06\x00\x00\x00…

Output()

# Store results

In [37]:
# After annotations are complete, store to a variable where they can later be accessed for analysis.
annotations = tortus.annotations
# this obejct is accumulative 

# SAVE 
annotations.to_csv("2nd_round_bart2.csv")

In [32]:
# If require future annotations, pass this dataframe as an argument in a subsequent instance of tortus.
tortus = Tortus(train, "text", num_records=50, random=False, annotations=annotations, labels=['Populist', 'Not populist'], id_column = 'par_id')

# and then repeat the annotating process


In [33]:
tortus.annotate()

HBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00#0\x00\x00\x06\xc4\x08\x06\x00\x00\x00…

Output()

# merge annotations to original df 

In [21]:
# after finishing annotations, concatenate with original df 
annotations.rename(columns = {'label':'label2'}, inplace= True)


merged = pd.concat([sample,annotations['label2']], axis=1)

pd.merge(train,annotations, how = 'left', on = "uniq_id")

Unnamed: 0.1,Unnamed: 0,Speech_id,par_id,text,party,term,comp,label,label2
0,0,2157,31,America's response to the Soviet Union against...,rep,1968,False,not populist,not populist
1,1,3321,16,While Bill Clinton has a million little plans ...,rep,1996,True,not populist,not populist
2,2,2008-10-18-louis-missouri-3,11,"The only """"welfare"""" in this campaign is John ...",dem,2008,False,populist,not populist
3,3,2646,12,I do promise that in the next four years the n...,rep,1980,True,not populist,not populist
4,4,2204,27,"So, my friends, in this whole field of peace, ...",rep,1968,False,not populist,
...,...,...,...,...,...,...,...,...,...
495,495,2016-11-03-raleigh-north-carolina,29,And I think it's fair to say that my opponent ...,dem,2016,False,not populist,
496,496,1968,23,Tonight we have 72.5 million people working. T...,dem,1964,False,not populist,
497,497,3110,5,"Now, today's been a wonderful day for Barbara ...",rep,1992,True,not populist,
498,498,2137,1,"That seems to me to be the great issue, and I ...",dem,1968,True,not populist,


# Write merged annotated data to csv

In [47]:
merged.to_csv("2nd_round_yuchen1.csv")

In [41]:
pd.merge(train,annotations, how = 'left', on = "uniq_id")[0:50]

Unnamed: 0.1,Unnamed: 0,Speech_id,par_id,text_x,party,term,comp,drawn,uniq_id,text_y,label,annotated_at
0,0,2157,31,America's response to the Soviet Union against...,rep,1968,False,randomly,2157_31,America's response to the Soviet Union against...,not populist,2020-12-18 18:08:21
1,1,3321,16,While Bill Clinton has a million little plans ...,rep,1996,True,randomly,3321_16,While Bill Clinton has a million little plans ...,not populist,2020-12-18 18:17:18
2,2,2008-10-18-louis-missouri-3,11,"The only """"welfare"""" in this campaign is John ...",dem,2008,False,randomly,2008-10-18-louis-missouri-3_11,"The only """"welfare"""" in this campaign is John ...",not populist,2020-12-18 18:21:42
3,3,2646,12,I do promise that in the next four years the n...,rep,1980,True,randomly,2646_12,I do promise that in the next four years the n...,not populist,2020-12-18 18:22:18
4,4,2204,27,"So, my friends, in this whole field of peace, ...",rep,1968,False,randomly,2204_27,"So, my friends, in this whole field of peace, ...",not populist,2020-12-18 18:22:44
5,5,2887,18,We also have one of the only state plant closi...,dem,1988,False,randomly,2887_18,We also have one of the only state plant closi...,not populist,2020-12-18 18:22:55
6,6,2785,13,We could put a price tag on the value of these...,rep,1984,False,randomly,2785_13,We could put a price tag on the value of these...,not populist,2020-12-18 18:23:45
7,7,1630,15,And I want to make it clear that if the Democr...,dem,1960,True,randomly,1630_15,And I want to make it clear that if the Democr...,not populist,2020-12-18 18:25:37
8,8,3367,13,It has dropped in half since the second quarte...,rep,1996,False,randomly,3367_13,It has dropped in half since the second quarte...,not populist,2020-12-18 18:25:59
9,9,2012-11-05-rally-madison-wisconsin,39,"So, Wisconsin, we know what change is. We know...",dem,2012,False,randomly,2012-11-05-rally-madison-wisconsin_39,"So, Wisconsin, we know what change is. We know...",not populist,2020-12-18 18:27:00


In [39]:
?pd.merge