In [99]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.pyplot import *
%matplotlib inline
 
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('pdf')

In [100]:
agg_data = pd.read_csv('aggregated.csv')
full_data = pd.read_csv('full_report.csv')

### Confidence

In [101]:
agg_data[['assigned_class:confidence']].describe()

Unnamed: 0,assigned_class:confidence
count,251.0
mean,0.86168
std,0.17678
min,0.3554
25%,0.67485
50%,1.0
75%,1.0
max,1.0


In [102]:
color_map = plt.cm.get_cmap('tab10')
plt.title('Confidence of Labeling')
plt.grid(axis='y', linestyle='dashed')
ticks = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
plt.xticks(ticks)
n, bins, patches = plt.hist(
    agg_data['assigned_class:confidence'], bins=ticks, color='green')
bin_centers = 0.5 * (bins[:-1] + bins[1:])

# scale values to interval [0,1]
col = bin_centers - min(bin_centers)
col /= max(col)

for c, p in zip(col, patches):
    plt.setp(p, 'facecolor', color_map(c))
plt.show()

<matplotlib.figure.Figure at 0x1a1c2cf860>

#### Least confident sentences

In [103]:
color_map = sns.light_palette('blue', as_cmap=True)
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.expand_frame_repr', True)
print('Least confident sentences (confidence <= 0.5)')
lc = agg_data[['_unit_id', 'assigned_class:confidence', 'assigned_class']]
lc = lc.sort_values('assigned_class:confidence')
lc = (lc[lc['assigned_class:confidence'] <= 0.5])
lc

Least confident sentences (confidence <= 0.5)


Unnamed: 0,_unit_id,assigned_class:confidence,assigned_class
168,1445637954,0.3554,WORSE
67,1445637841,0.3573,NO_COMP
149,1445637935,0.37,BETTER
213,1446784090,0.4967,WORSE


In [104]:
sentences = full_data[full_data['_unit_id'].isin(lc['_unit_id'].tolist())]
sentences_p = sentences[['_unit_id','assigned_class', '_trust', 'new_sentence']]
#sentences_p.style.background_gradient(cmap=color_map,subset=['_trust'])

In [105]:
merged = pd.merge(lc, sentences_p, on=['_unit_id'], how='inner', suffixes=['_l', '_r'])
merged.rename(columns={'assigned_class_l' : 'assigned', 'assigned_class_r':'proposed'}, inplace=True)
merged[['assigned_class:confidence', 'assigned', 'proposed','_trust', 'new_sentence']].style.background_gradient(cmap=color_map,subset=['assigned_class:confidence'])

Unnamed: 0,assigned_class:confidence,assigned,proposed,_trust,new_sentence
0,0.3554,WORSE,BETTER,0.7273,Google shouldn't have mandated an inferior map app on the iphone:[OBJECT_A] (as opposed to android:[OBJECT_B]).
1,0.3554,WORSE,WORSE,0.8571,Google shouldn't have mandated an inferior map app on the iphone:[OBJECT_A] (as opposed to android:[OBJECT_B]).
2,0.3554,WORSE,NO_COMP,0.8276,Google shouldn't have mandated an inferior map app on the iphone:[OBJECT_A] (as opposed to android:[OBJECT_B]).
3,0.3573,NO_COMP,NO_COMP,0.9091,"Sitting down with another programming language for a while can open your mind up to other ways of solving problems, and I think I became a better python:[OBJECT_A] programmer after spending time in ruby:[OBJECT_B] land."
4,0.3573,NO_COMP,BETTER,0.8571,"Sitting down with another programming language for a while can open your mind up to other ways of solving problems, and I think I became a better python:[OBJECT_A] programmer after spending time in ruby:[OBJECT_B] land."
5,0.3573,NO_COMP,UNCLEAR,0.7778,"Sitting down with another programming language for a while can open your mind up to other ways of solving problems, and I think I became a better python:[OBJECT_A] programmer after spending time in ruby:[OBJECT_B] land."
6,0.37,BETTER,NO_COMP,0.8276,Not to mention that the iphone:[OBJECT_A] and android:[OBJECT_B] phones deliver a far superior user experience overall.
7,0.37,BETTER,BETTER,1.0,Not to mention that the iphone:[OBJECT_A] and android:[OBJECT_B] phones deliver a far superior user experience overall.
8,0.37,BETTER,UNCLEAR,0.875,Not to mention that the iphone:[OBJECT_A] and android:[OBJECT_B] phones deliver a far superior user experience overall.
9,0.4967,WORSE,UNCLEAR,0.8333,"Do yourself a favor, park (or better yet sell) the car:[OBJECT_A], ride a bicycle:[OBJECT_B] or take public transportation."


### Labels

#### Distribution

In [106]:
def make_autopct(values):
    def my_autopct(pct):
        total = sum(values)
        val = int(round(pct*total/100.0))
        return '{p:.2f}%  ({v:d})'.format(p=pct,v=val)
    return my_autopct


labels = agg_data['label']
plt.figure(figsize=plt.figaspect(1))
plt.pie(
    labels.value_counts().values,
    labels=labels.value_counts().keys().tolist(),
    radius=2,
    autopct=make_autopct(labels.value_counts().values))
plt.show()


<matplotlib.figure.Figure at 0x1a1ca12c88>