# Compare predictions

In [4]:
from pathlib import Path
import numpy as np
import pandas as pd

In [5]:
OUTPUT_PATH = Path(".").absolute().parent / "output"

In [6]:
!ls ../output/inference_FurnitureInceptionV4_350_20180425_2258/

predictions.csv  predict.log  tensorboard


In [7]:
prediction_files = [
#     OUTPUT_PATH / "inference_FurnitureVGG16BN_20180412_0719" / "predictions.csv",
    OUTPUT_PATH / "inference_FurnitureNASNetALarge_20180418_0635" / "predictions.csv",
#     OUTPUT_PATH / "inference_FurnitureSqueezeNet350_20180415_1430" / "predictions.csv",
    OUTPUT_PATH / "inference_FurnitureInceptionV4_350_20180419_0623" / "predictions.csv",
    OUTPUT_PATH / "inference_FurnitureInceptionResNet299_20180423_2227" / "predictions.csv",
    OUTPUT_PATH / "inference_FurnitureInceptionV4_350_20180425_2258" / "predictions.csv",    
]

In [8]:
names = [f.parent.name.replace("inference_", "") for f in prediction_files]
names

['FurnitureNASNetALarge_20180418_0635',
 'FurnitureInceptionV4_350_20180419_0623',
 'FurnitureInceptionResNet299_20180423_2227',
 'FurnitureInceptionV4_350_20180425_2258']

In [9]:
dfs = [pd.read_csv(f, index_col='id') for f in prediction_files]

In [10]:
merged_df = pd.concat([df for df in dfs], axis=1, names=['a', 'b', 'c', 'd'])
merged_df.columns = names

In [11]:
merged_df.head(10)

Unnamed: 0_level_0,FurnitureNASNetALarge_20180418_0635,FurnitureInceptionV4_350_20180419_0623,FurnitureInceptionResNet299_20180423_2227,FurnitureInceptionV4_350_20180425_2258
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,12,12,12,12
2,71,71,71,71
3,91,91,91,91
4,54,54,54,54
5,126,126,126,126
6,76,76,76,76
7,94,94,94,94
8,8,8,8,8
9,127,127,127,127
10,117,117,117,117


In [12]:
disagreement_mask = merged_df.mean(axis=1).apply(lambda x: int(x) != x)
print(disagreement_mask.sum(), disagreement_mask.shape[0])

1564 12800


In [13]:
merged_df[disagreement_mask]

Unnamed: 0_level_0,FurnitureNASNetALarge_20180418_0635,FurnitureInceptionV4_350_20180419_0623,FurnitureInceptionResNet299_20180423_2227,FurnitureInceptionV4_350_20180425_2258
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
12,128,19,19,19
20,108,125,27,27
38,75,61,61,61
43,3,58,58,58
47,82,43,43,43
48,15,15,26,15
50,90,90,44,90
56,115,115,121,115
61,27,27,27,112
66,5,5,112,5


In [14]:
def get_decision_fn(weights):
    def fn(row):
        votes = np.zeros(128 + 1, dtype=np.int)
        for r, w in zip(row, weights):
            votes[r] += w
        return np.argmax(votes)
    return fn

In [15]:
merged_df.loc[:, 'MajVote'] = merged_df.apply(get_decision_fn(weights=[1.0, 1.0, 1.0, 1.0]), axis=1)

In [17]:
merged_df[disagreement_mask].head(10)

Unnamed: 0_level_0,FurnitureNASNetALarge_20180418_0635,FurnitureInceptionV4_350_20180419_0623,FurnitureInceptionResNet299_20180423_2227,FurnitureInceptionV4_350_20180425_2258,MajVote
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
12,128,19,19,19,19
20,108,125,27,27,27
38,75,61,61,61,61
43,3,58,58,58,58
47,82,43,43,43,43
48,15,15,26,15,15
50,90,90,44,90,90
56,115,115,121,115,115
61,27,27,27,112,27
66,5,5,112,5,5


In [19]:
merged_df[disagreement_mask].tail(10)

Unnamed: 0_level_0,FurnitureNASNetALarge_20180418_0635,FurnitureInceptionV4_350_20180419_0623,FurnitureInceptionResNet299_20180423_2227,FurnitureInceptionV4_350_20180425_2258,MajVote
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
12763,49,49,100,49,49
12766,60,106,127,106,106
12768,12,37,12,37,12
12776,79,121,121,121,121
12780,102,57,102,57,57
12781,23,4,15,4,4
12791,27,112,27,112,27
12795,2,2,88,2,2
12797,75,75,61,75,75
12798,3,58,58,58,58


In [20]:
merged_df['MajVote'].to_csv("maj_votes_nasnet_incv4_incres_incv4rc.csv", header=["predicted"])

In [22]:
!head maj_votes_nasnet_incv4_incres_incv4rc.csv

id,predicted
1,12
2,71
3,91
4,54
5,126
6,76
7,94
8,8
9,127
