## Data Collection

This notebook was used to analyze the errors made by the best features

In [1]:
import pandas as pd
from collections import defaultdict
from pprint import pprint
pd.options.display.max_rows = 1500

In [2]:
conf = pd.read_csv('../../code/data/data_if.csv')[['id','most_frequent_percentage']]
conf.set_index('id')
df_cols = ['id','caption','sentence','object_a','object_b','predicted','gold', 'most_frequent_percentage']
paths = pd.read_csv('../../code/data/path_embeddings/middle_paths_unrestricted_16.csv')
paths['sentence'] = paths.apply(lambda row: row['sentence'][2:-2],axis=1)
data = pd.read_csv('binary_True.csv')
pd.options.display.max_colwidth = 100000

In [3]:
shared_3 = pd.read_csv('3_shared.csv')
se_3 = pd.read_csv('3_se.csv')
lex_3 = pd.read_csv('3_lex.csv')

In [4]:
paths[paths.paths.str.len() < 20].paths.value_counts()

[['NOPATH']]    390
Name: paths, dtype: int64

In [5]:
# all errors made by sentence embeddings
se = data[data.caption == "InferSent"]
se = se.join(conf,rsuffix="_r")[df_cols]
se['repeated'] = se.apply(lambda x: True if x.sentence in se_3.sentence.values.tolist() else False,axis=1)

In [6]:
# all errors for the lexnet feature
lex = data[data.caption == 'middle_paths_unrestricted_16']
#lex.set_index('id')
lex = lex.join(conf,rsuffix="_r")[df_cols]
lex['repeated'] = lex.apply(lambda x: True if x.sentence in lex_3.sentence.values.tolist() else False,axis=1)

In [7]:
# all errors exclusivly made by se
se_unique = se[~se.sentence.isin(lex.sentence.values.tolist())].copy()
se_unique['origin'] = 'se (unique)'

In [8]:
p = paths[['sentence','paths','id']].copy()
p = p.set_index('id')

# errors exclusivly made by lexnet
lex_unique = lex[~lex.sentence.isin(se.sentence.values.tolist())].copy()
lex_unique['origin'] = 'lex (unique)'
lex_unique = lex_unique.set_index('id')
lex_unique = lex_unique.join(p,rsuffix='r')
lex_unique = lex_unique.drop_duplicates(subset=['sentence'])

In [20]:
# all errors made by both classifiers
se_multi = se[~se.sentence.isin(se_unique.sentence.values.tolist())].copy()
lex_multi = lex[~lex.sentence.isin(lex_unique.sentence.values.tolist())].copy()
se_multi['origin'] = 'both'
lex_multi['origin'] = 'both'
assert sorted(se_multi.id.values.tolist()) == sorted(lex_multi.id.values.tolist())
shared = se_multi.copy()
shared['caption'] = 'shared'
shared['repeated'] = shared.apply(lambda x: True if x.sentence in shared_3.sentence.values.tolist() else False,axis=1)
shared['pred_if'] = shared.apply(lambda x: se_multi[se_multi.sentence == x['sentence']].predicted.values.tolist()[0],axis=1)
shared['pred_lex'] = shared.apply(lambda x: lex_multi[lex_multi.sentence == x['sentence']].predicted.values.tolist()[0],axis=1)
shared = shared.set_index('id')
shared = shared.join(conf,rsuffix="_r")[['sentence','object_a','object_b','pred_if', 'pred_lex', 'gold', 'most_frequent_percentage','repeated']]

In [10]:
print('Shared {}'.format(len(shared)))
print('SE Unique {} | Lex Unique {}'.format(len(se_unique),len(lex_unique)))
print('SE {} | Lex {}'.format(len(se),len(lex)))
assert len(shared) + len(se_unique) == len(se)
assert len(shared) + len(lex_unique) == len(lex)

Shared 394
SE Unique 254 | Lex Unique 496
SE 648 | Lex 890


In [11]:
def class_confusion(df):
    d2 = defaultdict(int)
    d = defaultdict(int)
    for i, row in df.iterrows():
        k = 'Pred: {} - Gold: {}'.format(row['predicted'],row['gold'])
        d[k] += 1
    print('\n')
    pprint(sorted(d.items(),key=lambda x: x[1],reverse=True))

In [12]:
def analyze_errors(df,conf=True):
    df['s_len'] = df.apply(lambda x: len(x['sentence']),axis=1)
    print('### Confidence')
    print(df.most_frequent_percentage.value_counts())
    print('### Repeated Errors')
    print(df.repeated.value_counts())
    if conf:
        print('\n### Confusion')
        class_confusion(df)
    print('\n### Average sentence length')
   
    print(df.s_len.describe())
    print(df.groupby('most_frequent_percentage').mean().sort_values('s_len',ascending=False))
    

## Sentence Embedding Errors

In [13]:
analyze_errors(se_unique)

### Confidence
1.000000    178
0.800000     47
0.600000     18
0.400000      5
0.833333      4
0.666667      1
0.857143      1
Name: most_frequent_percentage, dtype: int64
### Repeated Errors
False    143
True     111
Name: repeated, dtype: int64

### Confusion


[('Pred: ARG - Gold: NONE', 164), ('Pred: NONE - Gold: ARG', 90)]

### Average sentence length
count    254.000000
mean     132.377953
std       70.632258
min       16.000000
25%       79.250000
50%      121.500000
75%      170.000000
max      498.000000
Name: s_len, dtype: float64
                          repeated       s_len
most_frequent_percentage                      
0.833333                  0.250000  166.000000
0.400000                  0.400000  142.800000
0.600000                  0.500000  135.444444
1.000000                  0.426966  132.539326
0.800000                  0.446809  129.063830
0.666667                  1.000000  100.000000
0.857143                  1.000000   50.000000


In [14]:
se_unique.sort_values('most_frequent_percentage',ascending=False).drop(columns=['caption','origin','id'])

Unnamed: 0,sentence,object_a,object_b,predicted,gold,most_frequent_percentage,repeated,s_len
2920,"The micro-USB adapter plugs into your smartphone, enabling faster-than-Bluetooth data speeds over a distance of a few centimeters.",USB,Bluetooth,NONE,ARG,1.0,True,130
3184,"It's better than B&N (no file table access at all) or Amazon (hard reset every time to update filetable), but still not as simple as Sony.",Amazon,Sony,ARG,NONE,1.0,True,138
3276,Motorola was king before Nokia came in.,Motorola,Nokia,NONE,ARG,1.0,False,39
3277,"I figured it was JavaScript, but if it can be done in PHP, that'd be better.",JavaScript,PHP,ARG,NONE,1.0,True,76
3282,2. (Since any Nike move would be years away) Make Adidas work better for Louisville.,Nike,Adidas,ARG,NONE,1.0,False,84
3284,"As far as my health care goes, I'd be very happy with a BMW 3-series, though I'll take a good, solid Honda.",BMW,Honda,ARG,NONE,1.0,False,107
3293,The only thing better than a cold glass of beer is tea with Miss McGill.,beer,tea,NONE,ARG,1.0,False,72
3294,"Granted both players have their own endorsement deals with the Nike, but perhaps they should have known better than to show an Adidas event in Swoosh-branded attire.",Nike,Adidas,ARG,NONE,1.0,True,165
3298,"No, I've had some very bad experiences with a bad language :-) And having started getting into Groovy, I might say that time spent on something like, say, closures, would be time better spent in the Java world then parametric polymorphism.",Groovy,Java,NONE,ARG,1.0,False,239
3300,How much cooler would it be to have Major League baseball than NBA basketball in Brooklyn?,baseball,basketball,ARG,NONE,1.0,True,90


## LexNet Errors

In [15]:
analyze_errors(lex_unique)

### Confidence
1.000000    349
0.800000     74
0.600000     56
0.400000     10
0.833333      3
0.666667      2
0.857143      1
0.500000      1
Name: most_frequent_percentage, dtype: int64
### Repeated Errors
True     251
False    245
Name: repeated, dtype: int64

### Confusion


[('Pred: NONE - Gold: ARG', 298), ('Pred: ARG - Gold: NONE', 198)]

### Average sentence length
count    496.000000
mean     132.008065
std       79.010471
min       17.000000
25%       83.000000
50%      113.000000
75%      161.000000
max      520.000000
Name: s_len, dtype: float64
                          repeated       s_len
most_frequent_percentage                      
0.833333                  0.333333  287.000000
0.400000                  0.700000  141.500000
0.600000                  0.553571  134.375000
1.000000                  0.481375  131.644699
0.800000                  0.567568  126.094595
0.500000                  0.000000  109.000000
0.666667                  0.500000   97.500000
0.857143     

In [16]:
temp = lex_unique.sort_values('most_frequent_percentage',ascending=False).drop(columns=['caption','origin','sentencer','s_len'])
np = temp[temp.paths.str.startswith("[['NOP")]
print('Sentences without path: {}'.format(len(np)))

Sentences without path: 51


In [30]:
temp[~temp.repeated]


Unnamed: 0_level_0,sentence,object_a,object_b,predicted,gold,most_frequent_percentage,repeated,paths
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
L1822257906,"North carolina however does look solid for Romney, and missouri is still trending for him although the Senate race there is now a toss-up.",carolina,missouri,ARG,NONE,1.0,False,[['X/PROPN/nsubj/> look/VERB/ROOT/^ Y/PROPN/conj/<']]
M2000603877,Pity Apple didn't kill the IBM PC much earlier and we'd have the vastly superior Motorola technology.,Apple,IBM,ARG,NONE,1.0,False,"[['X/PROPN/nsubj/> kill/VERB/ROOT/^ Y/PROPN/dobj/<'], ['X/PROPN/nsubj/> kill/VERB/ROOT/^ the/DET/det/V Y/PROPN/dobj/<']]"
M1985992150,"Perhaps it is easier to be ""from"" Alaska or california or Arizona that it is to be ""from"" Maine or South carolina.",california,carolina,ARG,NONE,1.0,False,"[['X/PROPN/dep/> be/VERB/ROOT/^ be/VERB/xcomp/< from/ADP/prep/<', 'maine/PROPN/pobj/< Y/PROPN/conj/<'], ['X/PROPN/dep/> or/CONJ/cc/V be/VERB/ROOT/^ be/VERB/xcomp/<', 'from/ADP/prep/< maine/PROPN/pobj/< Y/PROPN/conj/<'], ['X/PROPN/dep/> arizona/PROPN/conj/V be/VERB/ROOT/^ be/VERB/xcomp/<', 'from/ADP/prep/< maine/PROPN/pobj/< Y/PROPN/conj/<'], ['X/PROPN/dep/> be/VERB/ROOT/^ be/VERB/xcomp/< from/ADP/prep/<', 'maine/PROPN/pobj/< south/PROPN/compound/V Y/PROPN/conj/<']]"
M1925837999,O: pretty decent fruit forward beer that just wasn't my cup of tea.,beer,tea,ARG,NONE,1.0,False,"[['X/PROPN/ROOT/^ Y/PROPN/pobj/<'], ['X/PROPN/ROOT/^ be/VERB/relcl/V Y/PROPN/pobj/<']]"
L1905912546,One was for pistol barrels and one with a much slower twist was for rifle barrels.,pistol,rifle,NONE,ARG,1.0,False,[['X/PROPN/compound/> be/VERB/ROOT/^ Y/PROPN/pobj/<']]
L1843135135,Nowdays though it seems that SE is makeing better quality built phones followed closely by Moto and Siemens (love the SL55 BTW - just i need BT) and last and IMO least - Nokia.,Siemens,Nokia,NONE,ARG,1.0,False,"[['X/PROPN/nsubj/> need/VERB/ROOT/^ last/ADJ/conj/< Y/PROPN/conj/<'], ['X/PROPN/nsubj/> love/VERB/acl/V need/VERB/ROOT/^ last/ADJ/conj/<', 'Y/PROPN/conj/<'], ['X/PROPN/nsubj/> need/VERB/ROOT/^ last/ADJ/conj/< imo/ADV/advmod/V', 'Y/PROPN/conj/<'], ['X/PROPN/nsubj/> need/VERB/ROOT/^ last/ADJ/conj/< least/ADV/amod/V', 'Y/PROPN/conj/<']]"
L1823778672,No soda or anything with caffeine (including chocolate) because caffeine irritates the urinary tract and will make it worse.,soda,chocolate,ARG,NONE,1.0,False,"[['X/PROPN/ROOT/^ Y/PROPN/pobj/<'], ['X/PROPN/ROOT/^ or/CONJ/cc/V Y/PROPN/pobj/<'], ['X/PROPN/ROOT/^ anything/NOUN/conj/V Y/PROPN/pobj/<']]"
L1813324493,"Nokia is just one of many Windows 8 phones, and not even the best because Samsung is far better!",Nokia,Samsung,NONE,ARG,1.0,False,[['X/PROPN/nsubj/> be/VERB/ROOT/^ because/ADP/prep/< Y/PROPN/pobj/<']]
L1805195115,"Nikon gave to Sigma a first ""wake-up call"" with the cheaper and much better 35mm 1.8DX.",Nikon,Sigma,NONE,ARG,1.0,False,[['X/PROPN/nsubj/> give/VERB/ROOT/^ to/ADP/prep/< Y/PROPN/pobj/<']]
L1800897966,"Next, the cement cannot penetrate the wood.",cement,wood,ARG,NONE,1.0,False,"[['X/PROPN/nsubj/> penetrate/VERB/ROOT/^ Y/PROPN/dobj/<'], ['X/PROPN/nsubj/> penetrate/VERB/ROOT/^ the/DET/det/V Y/PROPN/dobj/<']]"


## Shared

In [22]:
analyze_errors(shared,conf=False)

### Confidence
1.000000    280
0.800000     54
0.600000     45
0.400000     10
0.833333      3
0.500000      2
Name: most_frequent_percentage, dtype: int64
### Repeated Errors
True     309
False     85
Name: repeated, dtype: int64

### Average sentence length
count    394.000000
mean     121.121827
std       67.194983
min       27.000000
25%       77.000000
50%      104.000000
75%      147.000000
max      510.000000
Name: s_len, dtype: float64
                          repeated       s_len
most_frequent_percentage                      
0.833333                  1.000000  142.333333
0.800000                  0.777778  133.166667
0.500000                  0.500000  131.000000
0.600000                  0.822222  124.466667
1.000000                  0.778571  118.942857
0.400000                  0.800000   93.700000


In [28]:
shared[~shared.repeated].sample(20)

Unnamed: 0_level_0,sentence,object_a,object_b,pred_if,pred_lex,gold,most_frequent_percentage,repeated,s_len
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
J1441471629,"Its Azure PaaS/IaaS platform hasn't overtaken Amazon yet in market share, but Microsoft has enjoyed nine straight quarters of growth at 10 percent or better .",Amazon,Microsoft,NONE,NONE,ARG,0.5,False,158
E673909808,"Even without that killer app, people who do upgrade from older versions, such as Windows 98, are likely to find Windows XP to be ""a better version of Windows than a lot of people have realized,"" Cherry said.",Windows 98,Windows XP,NONE,NONE,ARG,1.0,False,207
J1549219079,"Jesse James returned to missouri, while Frank went to safer territory in virginia.",missouri,virginia,ARG,ARG,NONE,1.0,False,82
O2302907732,"Still Perl is faster, but Java is not worse than other languages like Python.",Perl,Python,ARG,ARG,NONE,0.4,False,77
G1024603203,"IBM on Wednesday unveiled new POWER8 processor-based servers that the computing giant said ""are capable of analyzing data 50 times faster than the latest x86-based systems"" built on chips from companies like Intel and Advanced Micro Devices.",IBM,Intel,NONE,NONE,ARG,1.0,False,241
B297763396,An example would be Ford makes better pick up trucks and Toyota makes better sedans.,Ford,Toyota,ARG,ARG,NONE,1.0,False,84
Q2627775173,"The psp sells very well in Japan, in america sometimes the ps2 sells better.",psp,ps2,NONE,NONE,ARG,1.0,False,76
I1312277030,In the same ballpark are Haskell (and maybe in the future Python) iff their compilers generate better structures because the task is better formally defined.,Haskell,Python,ARG,ARG,NONE,1.0,False,157
L1777173235,"MySQL might be faster for simple operations, but PostgreSQL completely demolishes it in complex queries.",MySQL,PostgreSQL,ARG,ARG,NONE,1.0,False,104
T3174171655,Why do most wii games look like gamecube games or worse?,wii,gamecube,ARG,ARG,NONE,0.6,False,56


## All

In [24]:
a = pd.concat([shared,lex_unique,se_unique])
analyze_errors(a)

### Confidence
1.000000    807
0.800000    175
0.600000    119
0.400000     25
0.833333     10
0.666667      3
0.500000      3
0.857143      2
Name: most_frequent_percentage, dtype: int64
### Repeated Errors
True     671
False    473
Name: repeated, dtype: int64

### Confusion


[('Pred: NONE - Gold: ARG', 388),
 ('Pred: ARG - Gold: NONE', 362),
 ('Pred: nan - Gold: ARG', 261),
 ('Pred: nan - Gold: NONE', 133)]

### Average sentence length
count    1144.000000
mean      128.340909
std        73.400786
min        16.000000
25%        80.000000
50%       111.500000
75%       158.000000
max       520.000000
Name: s_len, dtype: float64
                          repeated       s_len
most_frequent_percentage                      
0.833333                  0.500000  195.200000
0.600000                  0.647059  130.789916
0.800000                  0.600000  129.074286
1.000000                  0.572491  127.434944
0.500000                  0.333333  123.666667
0.400000                  0.680

## Misc

In [25]:
different = []
for i, row in shared.iterrows():
    se_s = se_multi[se_multi.sentence == row['sentence']].predicted.values.tolist()[0]
    lex_s = lex_multi[lex_multi.sentence == row['sentence']].predicted.values.tolist()[0]
    if se_s != lex_s:
        different.append(row['sentence'])
print('Different mistakes on same sentence: {}'.format(len(different)))


Different mistakes on same sentence: 0


In [26]:
pd.concat([shared,lex_unique,se_unique]).to_csv('2_classes_errors.csv')