# Compare predictions

In [1]:
from pathlib import Path
import numpy as np
import pandas as pd

In [2]:
OUTPUT_PATH = Path(".").absolute().parent / "output"

In [12]:
prediction_files = [
#     OUTPUT_PATH / "inference_FurnitureVGG16BN_20180412_0719" / "predictions.csv",
    OUTPUT_PATH / "inference_FurnitureNASNetALarge_20180418_0635" / "predictions.csv",
#     OUTPUT_PATH / "inference_FurnitureSqueezeNet350_20180415_1430" / "predictions.csv",
    OUTPUT_PATH / "inference_FurnitureInceptionV4_350_20180419_0623" / "predictions.csv",
    OUTPUT_PATH / "inference_FurnitureInceptionResNet299_20180423_2227" / "predictions.csv",
    OUTPUT_PATH / "inference_FurnitureDenseNet161_350_20180424_0700" / "predictions.csv",    
]

In [13]:
names = [f.parent.name.replace("inference_", "") for f in prediction_files]
names

['FurnitureVGG16BN_20180412_0719',
 'FurnitureNASNetALarge_20180418_0635',
 'FurnitureInceptionV4_350_20180419_0623',
 'FurnitureInceptionResNet299_20180423_2227',
 'FurnitureDenseNet161_350_20180424_0700']

In [14]:
dfs = [pd.read_csv(f, index_col='id') for f in prediction_files]

In [15]:
merged_df = pd.concat([df for df in dfs], axis=1, names=['a', 'b', 'c', 'd'])
merged_df.columns = names

In [16]:
merged_df.head(10)

Unnamed: 0_level_0,FurnitureVGG16BN_20180412_0719,FurnitureNASNetALarge_20180418_0635,FurnitureInceptionV4_350_20180419_0623,FurnitureInceptionResNet299_20180423_2227,FurnitureDenseNet161_350_20180424_0700
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,12,12,12,12,12
2,71,71,71,71,71
3,91,91,91,91,91
4,54,54,54,54,54
5,42,126,126,126,104
6,76,76,76,76,76
7,94,94,94,94,94
8,42,8,8,8,8
9,127,127,127,127,127
10,117,117,117,117,117


In [17]:
disagreement_mask = merged_df.mean(axis=1).apply(lambda x: int(x) != x)
print(disagreement_mask.sum(), disagreement_mask.shape[0])

2640 12800


In [18]:
merged_df[disagreement_mask]

Unnamed: 0_level_0,FurnitureVGG16BN_20180412_0719,FurnitureNASNetALarge_20180418_0635,FurnitureInceptionV4_350_20180419_0623,FurnitureInceptionResNet299_20180423_2227,FurnitureDenseNet161_350_20180424_0700
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
5,42,126,126,126,104
8,42,8,8,8,8
12,19,128,19,19,19
14,45,77,45,77,45
16,65,111,111,111,111
20,27,108,125,27,27
24,63,15,15,63,15
26,89,89,97,97,89
31,81,77,77,77,77
38,61,75,61,61,61


In [9]:
def get_decision_fn(weights):
    def fn(row):
        votes = np.zeros(128 + 1, dtype=np.int)
        for r, w in zip(row, weights):
            votes[r] += w
        return np.argmax(votes)
    return fn

In [66]:
merged_df.loc[:, 'MajVote'] = merged_df.apply(get_decision_fn(weights=[1.0, 3.0, 1.0, 2.5]), axis=1)

In [67]:
merged_df.head(10)

Unnamed: 0_level_0,FurnitureVGG16BN_20180412_0719,FurnitureNASNetALarge_20180418_0635,FurnitureSqueezeNet350_20180415_1430,FurnitureInceptionV4_350_20180419_0623,MajVote
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,12,12,12,12,12
2,71,71,71,71,71
3,91,91,91,91,91
4,54,54,54,54,54
5,42,126,42,126,126
6,76,76,76,76,76
7,94,94,96,94,94
8,42,8,42,8,8
9,127,127,127,127,127
10,117,117,117,117,117


In [68]:
merged_df.tail(10)

Unnamed: 0_level_0,FurnitureVGG16BN_20180412_0719,FurnitureNASNetALarge_20180418_0635,FurnitureSqueezeNet350_20180415_1430,FurnitureInceptionV4_350_20180419_0623,MajVote
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
12791,112,27,112,112,112
12792,82,82,82,82,82
12793,50,50,50,50,50
12794,111,111,111,111,111
12795,88,2,88,2,2
12796,77,77,77,77,77
12797,75,75,75,75,75
12798,58,3,58,58,58
12799,122,1,1,1,1
12800,30,30,30,30,30


In [69]:
merged_df['MajVote'].to_csv("maj_votes_vgg_nasnet_sqnet_incv4.csv", header=["predicted"])

In [71]:
!head maj_votes_vgg_nasnet_sqnet_incv4.csv

id,predicted
1,12
2,71
3,91
4,54
5,126
6,76
7,94
8,8
9,127
