# Compare predictions

In [25]:
from pathlib import Path
import numpy as np
import pandas as pd

In [26]:
OUTPUT_PATH = Path(".").absolute().parent / "output"

In [24]:
# !head -50 ../output/inference_FurnitureInceptionV4_350_20180425_2258/predict.log

In [27]:
prediction_files = [
#     OUTPUT_PATH / "inference_FurnitureVGG16BN_20180412_0719" / "predictions.csv",
    OUTPUT_PATH / "inference_FurnitureNASNetALarge_20180418_0635" / "predictions.csv",
#     OUTPUT_PATH / "inference_FurnitureSqueezeNet350_20180415_1430" / "predictions.csv",
#     OUTPUT_PATH / "inference_FurnitureInceptionV4_350_20180419_0623" / "predictions.csv",
    OUTPUT_PATH / "inference_FurnitureInceptionResNet299_20180426_1440" / "predictions.csv",
    OUTPUT_PATH / "inference_FurnitureInceptionV4_350_20180425_2258" / "predictions.csv",    
]

In [28]:
names = [f.parent.name.replace("inference_", "") for f in prediction_files]
names

['FurnitureNASNetALarge_20180418_0635',
 'FurnitureInceptionResNet299_20180426_1440',
 'FurnitureInceptionV4_350_20180425_2258']

In [30]:
dfs = [pd.read_csv(f, index_col='id') for f in prediction_files]

merged_df = pd.concat([df for df in dfs], axis=1, names=['a', 'b', 'c', 'd'])
merged_df.columns = names

In [31]:
merged_df.head(10)

Unnamed: 0_level_0,FurnitureNASNetALarge_20180418_0635,FurnitureInceptionResNet299_20180426_1440,FurnitureInceptionV4_350_20180425_2258
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,12,12,12
2,71,71,71
3,91,91,91
4,54,54,54
5,126,126,126
6,76,76,76
7,94,94,94
8,8,8,8
9,127,127,127
10,117,117,117


In [32]:
disagreement_mask = merged_df.mean(axis=1).apply(lambda x: int(x) != x)
print(disagreement_mask.sum(), disagreement_mask.shape[0])

1236 12800


In [33]:
merged_df[disagreement_mask]

Unnamed: 0_level_0,FurnitureNASNetALarge_20180418_0635,FurnitureInceptionResNet299_20180426_1440,FurnitureInceptionV4_350_20180425_2258
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
12,128,19,19
14,77,45,77
24,15,63,23
26,89,89,97
38,75,61,61
43,3,58,58
61,27,27,112
76,14,30,30
77,89,91,89
86,124,124,65


In [34]:
def get_decision_fn(weights):
    def fn(row):
        votes = np.zeros(128 + 1, dtype=np.int)
        for r, w in zip(row, weights):
            votes[r] += w
        return np.argmax(votes)
    return fn

In [35]:
merged_df.loc[:, 'MajVote'] = merged_df.apply(get_decision_fn(weights=[1.0, 1.0, 1.0]), axis=1)

In [36]:
merged_df[disagreement_mask].head(10)

Unnamed: 0_level_0,FurnitureNASNetALarge_20180418_0635,FurnitureInceptionResNet299_20180426_1440,FurnitureInceptionV4_350_20180425_2258,MajVote
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
12,128,19,19,19
14,77,45,77,77
24,15,63,23,15
26,89,89,97,89
38,75,61,61,61
43,3,58,58,58
61,27,27,112,27
76,14,30,30,30
77,89,91,89,89
86,124,124,65,124


In [37]:
merged_df[disagreement_mask].tail(10)

Unnamed: 0_level_0,FurnitureNASNetALarge_20180418_0635,FurnitureInceptionResNet299_20180426_1440,FurnitureInceptionV4_350_20180425_2258,MajVote
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
12716,84,97,84,84
12722,66,32,32,32
12723,30,30,37,30
12755,20,100,100,100
12766,60,106,106,106
12768,12,12,37,12
12781,23,4,4,4
12791,27,27,112,27
12795,2,88,2,2
12798,3,58,58,58


In [38]:
merged_df['MajVote'].to_csv("maj_votes_nasnet_incv4_incv4rc.csv", header=["predicted"])

In [40]:
!head maj_votes_nasnet_incv4_incv4rc.csv

id,predicted
1,12
2,71
3,91
4,54
5,126
6,76
7,94
8,8
9,127
