In [None]:
from utils import *
import pandas as pd
import numpy as np
from quica.measures.irr import *
from quica.dataset.dataset import IRRDataset
from quica.quica import Quica
pd.set_option('display.float_format', str)
np.random.seed(0)
pd.set_option('max_colwidth', 400)


In [None]:
hit_9 = pd.read_csv("../data/crowdsourced/mechanical_turk/raw/hit_9_raw.csv").query("status_internal=='APPROVED'").reset_index(drop=True, inplace=False)


In [None]:

part_1 = transform_one_hot_df(hit_9, 
                     ['id_worker', 'question1_1_1.arg_a', 'question1_1_1.arg_b', 'question1_1_1.none'],
                     'question1_1_1_answers')

part_2 = transform_one_hot_df(hit_9, 
                     ['id_worker', 'question1_1_2.arg_a', 'question1_1_2.arg_b', 'question1_1_2.none'],
                     'question1_1_2_answers')

part_3 = transform_one_hot_df(hit_9, 
                     ['id_worker', 'question1_1_2.arg_a', 'question1_1_2.arg_b', 'question1_1_2.none'],
                     'question1_1_2_answers')

part_4 = transform_one_hot_df(hit_9, 
                     ['id_worker', 'question2_1_3.val1', 'question2_1_3.val2', 'question2_1_3.val3'],
                     'question2_1_3_answers')


part_5 = transform_one_hot_df(hit_9, 
                     ['id_worker', 'question2_2_3.val1', 'question2_2_3.val2', 'question2_2_3.val3'],
                     'question2_2_3_answers')



part_6 = transform_one_hot_df(hit_9, 
                     ['id_worker', 'question2_3_3.val1', 'question2_3_3.val2', 'question2_3_3.val3'],
                     'question2_3_3_answers')


part_7 = transform_one_hot_df(hit_9, 
                     ['id_worker', 'question2_4_3.val1', 'question2_4_3.val2', 'question2_4_3.val3'],
                     'question2_4_3_answers')


part_8 = transform_one_hot_df(hit_9, 
                     ['id_worker', 'question3_3_4.val1', 'question3_3_4.val2', 'question3_3_4.val3'],
                     'question3_3_4_answers')



bias_answers = pd.concat([part_1, part_2, part_3, part_4,
                          part_5, part_6, part_7, part_8], axis=1).T.drop_duplicates().T
bias_answers


# === Part 2/5 (Stance Labeling) ==



## 1. Which text is more one-sided?

In [None]:
question1_a = hit_9[['question3_3_4.val1']].value_counts().to_dict()
question1_b = hit_9[['question3_3_4.val2']].value_counts().to_dict()
question1_c = hit_9[['question3_3_4.val3']].value_counts().to_dict()


q1 = {'Arg A': transform_dict(question1_a), 
      'Arg B': transform_dict(question1_b), 
      'None': transform_dict(question1_c)}

print(q1)
summary = get_summary_choice(q1)

get_barchart(summary,  'Israeli military assault in Gaza', 
             'Which text is more one-sided? (None)', hit_9)


In [None]:
answers = bias_answers["question3_3_4_answers"].tolist()
id_workers = bias_answers["id_worker"].tolist()

df = pd.DataFrame(list(zip(answers, id_workers)), columns = ['answer', 'id_worker'])
l = df.query("answer=='val1'")['id_worker'].tolist()

print(f"successful workers list \n{l}")

## 2. How sure you are?

In [None]:
question2_a = hit_9[['question3_4_4.val1']].value_counts().to_dict()
question2_b = hit_9[['question3_4_4.val2']].value_counts().to_dict()
question2_c = hit_9[['question3_4_4.val3']].value_counts().to_dict()


q2 = {'Very sure': transform_dict(question2_a), 
      'Reasonable sure': transform_dict(question2_b), 
      'Unsure': transform_dict(question2_c)}

print(q2)
summary = get_summary_choice(q2)

get_barchart(summary,  'Israeli military assault in Gaza', 
             'How sure you are?', hit_9)


# === Part 3/5 ==



 
## 1. Which text has more Pro stances (paragraphs that agree with the topic)?


In [None]:
question1_a = hit_9[['question2_1_3.val1']].value_counts().to_dict()
question1_b = hit_9[['question2_1_3.val2']].value_counts().to_dict()
question1_c = hit_9[['question2_1_3.val3']].value_counts().to_dict()



q1 = {'Arg A': transform_dict(question1_a), 
      'Arg B': transform_dict(question1_b), 
      'None': transform_dict(question1_c)}

print(q1)
summary = get_summary_choice(q1)


get_barchart(summary, 'Criminalization of Holocaust denial', 
             'Which text has more Pro stances (None)?',
             hit_9)

## 2. Which text has more Con stances (paragraphs that disagree with the topic)?

In [None]:

question1_a = hit_9[['question2_2_3.val1']].value_counts().to_dict()
question1_b = hit_9[['question2_2_3.val2']].value_counts().to_dict()
question1_c = hit_9[['question2_2_3.val3']].value_counts().to_dict()



q1 = {'Arg A': transform_dict(question1_a), 
      'Arg B': transform_dict(question1_b), 
      'None': transform_dict(question1_c)}


print(q1)
summary = get_summary_choice(q1)


get_barchart(summary, 'Infant male circumcision', 
             'Which text has more Con stances (None)?', hit_9)


## 3. Which text is more one-sided?

In [None]:
question1_a = hit_9[['question2_3_3.val1']].value_counts().to_dict()
question1_b = hit_9[['question2_3_3.val2']].value_counts().to_dict()
question1_c = hit_9[['question2_3_3.val3']].value_counts().to_dict()



q1 = {'Arg A': transform_dict(question1_a), 
      'Arg B': transform_dict(question1_b), 
      'None': transform_dict(question1_c)}

print(q1)
summary = get_summary_choice(q1)

# Correct answer: (2, 0)
get_barchart(summary, ' Criminalization of Holocaust denial ', 'Which text is more one-sided? (B)', hit_9)



In [None]:
topic1_question_3 = hit_9[['id_worker', 'question2_3_3.val1', 'question2_3_3.val2', 'question2_3_3.val3']]

topic1_question_3 = topic1_question_3.set_index('id_worker')
topic1_question_3 = topic1_question_3.dot(topic1_question_3.columns).to_frame('predicted').reset_index()


topic1_question_3['predicted'] = topic1_question_3['predicted'].apply(lambda x: x.split('.')[1])
topic1_question_3['true_val'] =  'val3'


y_pred = topic1_question_3['predicted'].tolist()
y_true = topic1_question_3['true_val'].tolist()
print("Accuracy for topic 3: (Criminalization of Holocaust denial)", accuracy_score(y_true, y_pred))
# conf_mat(y_pred, y_true, ' Infant male circumcision ')



## 4. How sure you are?

In [None]:
question1_a = hit_9[['question2_4_3.val1']].value_counts().to_dict()
question1_b = hit_9[['question2_4_3.val2']].value_counts().to_dict()
question1_c = hit_9[['question2_4_3.val3']].value_counts().to_dict()



q1 = {'Very sure': transform_dict(question1_a), 
      'Reasonable sure': transform_dict(question1_b), 
      'Unsure': transform_dict(question1_c)}


print(q1)
summary = get_summary_choice(q1)


get_barchart(summary, 'Criminalization of Holocaust denial', 'How sure you are?', hit_9)


# === Part 4/5 ==


##  1. Which text is more one-sided? 

In [None]:
question1_a = hit_9[['question1_1_2.arg_a']].value_counts().to_dict()
question1_b = hit_9[['question1_1_2.arg_b']].value_counts().to_dict()
question1_c = hit_9[['question1_1_2.none']].value_counts().to_dict()



q1 = {'A': transform_dict(question1_a), 
      'B': transform_dict(question1_b), 
      'None': transform_dict(question1_c)}


print(q1)
summary = get_summary_choice(q1)
get_barchart(summary, 'Prohibition of downloading music and films', 
             'Which text is more onse-sided? (A)', hit_9)


## 2. How sure you are?

In [None]:

question2_a = hit_9[['question1_2_2.val1']].value_counts().to_dict()
question2_b = hit_9[['question1_2_2.val2']].value_counts().to_dict()
question2_c = hit_9[['question1_2_2.val3']].value_counts().to_dict()


q2 = {'Very sure': transform_dict(question2_a), 
      'Reasonable sure': transform_dict(question2_b), 
      'Unsure': transform_dict(question2_c)}

print(q2)
summary = get_summary_choice(q2)

get_barchart(summary, 'Prohibition of downloading music and films', 'How sure you are?', hit_9)

## 3. Please, explain your decision in question 1, why do you consider the selected option is more one-sided?

In [None]:
hit_9[['id_worker', 'question1_3_2']]

# === Part 5/5 ==


##  1. Which text is more one-sided? 

In [None]:
question1_a = hit_9[['question1_1_1.arg_a']].value_counts().to_dict()
question1_b = hit_9[['question1_1_1.arg_b']].value_counts().to_dict()
question1_c = hit_9[['question1_1_1.none']].value_counts().to_dict()



q1 = {'A': transform_dict(question1_a), 
      'B': transform_dict(question1_b), 
      'None': transform_dict(question1_c)}


print(q1)
summary = get_summary_choice(q1)
get_barchart(summary, 'DC handgun ban', 'Which text is more onse-sided? (A)', hit_9)


## 2. How sure you are?

In [None]:

question2_a = hit_9[['question1_2_1.val1']].value_counts().to_dict()
question2_b = hit_9[['question1_2_1.val2']].value_counts().to_dict()
question2_c = hit_9[['question1_2_1.val3']].value_counts().to_dict()


q2 = {'Very sure': transform_dict(question2_a), 
      'Reasonable sure': transform_dict(question2_b), 
      'Unsure': transform_dict(question2_c)}

print(q2)
summary = get_summary_choice(q2)

get_barchart(summary,  'DC handgun ban', 'How sure you are?', hit_9)

In [None]:
data = hit_9[['id_worker','question1_1_1.arg_a',
              'question1_1_1.arg_b', 'question1_1_1.none']]


dataframe = create_format_for_quica(data)

quica = Quica(dataframe=dataframe)
quica.get_results()

In [None]:
hit_9[['id_worker', 'question1_3_1']]

### Hasta aqui

## Accuracy





In [None]:
from sklearn.metrics import accuracy_score
from sklearn import preprocessing

topic1_question_1 = hit_9[['id_worker', 'question1_1_2.arg_a', 'question1_1_2.arg_b', 'question1_1_2.none']]

topic1_question_1 = topic1_question_1.set_index('id_worker')
topic1_question_1 = topic1_question_1.dot(topic1_question_1.columns).to_frame('predicted').reset_index()

topic1_question_1['predicted'] = topic1_question_1['predicted'].apply(lambda x: x.split('.')[1])
topic1_question_1['true_val'] =  'arg_a'



y_pred1 = topic1_question_1['predicted'].tolist()
y_true1 = topic1_question_1['true_val'].tolist()
print("Accuracy for topic 1 (Prohibition of downloading music and films):             ", accuracy_score(y_true1, y_pred1))



In [None]:
from sklearn.metrics import accuracy_score
from sklearn import preprocessing

topic1_question_1 = hit_9[['id_worker', 'question1_1_1.arg_a', 'question1_1_1.arg_b', 'question1_1_1.none']]

topic1_question_1 = topic1_question_1.set_index('id_worker')
topic1_question_1 = topic1_question_1.dot(topic1_question_1.columns).to_frame('predicted').reset_index()

topic1_question_1['predicted'] = topic1_question_1['predicted'].apply(lambda x: x.split('.')[1])
topic1_question_1['true_val'] =  'arg_a'


# topic1_question_1
y_pred1 = topic1_question_1['predicted'].tolist()
y_true1 = topic1_question_1['true_val'].tolist()
print("Accuracy for topic 2 (DC handgun ban):           ", accuracy_score(y_true1, y_pred1))



## === Section 8 ===


### Do you have any final thoughts?

In [None]:
hit_9[['id_worker', 'question1_3_last1']]


### What did you learn about one-sidedness


In [None]:
hit_9[['id_worker', 'question1_3_last2']]

## Timing

In [None]:

timing = hit_9[['id_worker', 'time_elapsed_1', 'time_elapsed_2', 'time_elapsed_3', 'time_elapsed_id_4',
                'time_elapsed_instructions', 'time_elapsed_last', 'time_elapsed_personal_stance',
                'time_elapsed_profile']]



df = timing.melt('id_worker', var_name='Timings',  value_name='seconds')
g = sns.catplot(x="id_worker", y="seconds", hue='Timings', data=df, kind ='bar')
g.set_xticklabels(rotation=90)
g.set_yticklabels(size = 15)
g.set_xticklabels(size = 18)
g.set_xlabels(size = 20)
g.set_ylabels(size = 20)

g.fig.set_figwidth(18.27)
g.fig.set_figheight(8.0)

plt.setp(g._legend.get_title(), fontsize=30)
plt.setp(g._legend.get_texts(), fontsize=25)