# Compare predictions

In [1]:
from pathlib import Path
import numpy as np
import pandas as pd

In [2]:
OUTPUT_PATH = Path(".").absolute().parent / "output"

In [24]:
# !head -50 ../output/inference_FurnitureInceptionV4_350_20180425_2258/predict.log

In [140]:
prediction_files = [
    OUTPUT_PATH / "inference_FurnitureNASNetALarge_20180418_0635" / "predictions.csv",
    OUTPUT_PATH / "test_nasnetalarge_350_resized_crop" / "20180428_1455" / "predictions.csv",    
    OUTPUT_PATH / "inference_FurnitureInceptionResNet299_20180426_1440" / "predictions.csv",
    OUTPUT_PATH / "inference_FurnitureInceptionV4_350_20180425_2258" / "predictions.csv",
    OUTPUT_PATH / "inference_FurnitureInceptionV4_350_20180428_0808" / "predictions.csv",
]

In [141]:
names = [f.parent.name.replace("inference_", "") for f in prediction_files]
names

['FurnitureNASNetALarge_20180418_0635',
 '20180428_1455',
 'FurnitureInceptionResNet299_20180426_1440',
 'FurnitureInceptionV4_350_20180425_2258',
 'FurnitureInceptionV4_350_20180428_0808']

In [142]:
dfs = [pd.read_csv(f, index_col='id') for f in prediction_files]

merged_df = pd.concat([df for df in dfs], axis=1)
merged_df.columns = names

In [143]:
merged_df.head(10)

Unnamed: 0_level_0,FurnitureNASNetALarge_20180418_0635,20180428_1455,FurnitureInceptionResNet299_20180426_1440,FurnitureInceptionV4_350_20180425_2258,FurnitureInceptionV4_350_20180428_0808
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,12,12,12,12,12
2,71,71,71,71,71
3,91,91,91,91,91
4,54,54,54,54,54
5,126,126,126,126,126
6,76,76,76,76,76
7,94,94,94,94,94
8,8,8,8,8,8
9,127,127,127,127,127
10,117,117,117,117,117


In [144]:
disagreement_mask = merged_df.mean(axis=1).apply(lambda x: int(x) != x)
print(disagreement_mask.sum(), disagreement_mask.shape[0])

1690 12800


In [145]:
merged_df[disagreement_mask].head(10)

Unnamed: 0_level_0,FurnitureNASNetALarge_20180418_0635,20180428_1455,FurnitureInceptionResNet299_20180426_1440,FurnitureInceptionV4_350_20180425_2258,FurnitureInceptionV4_350_20180428_0808
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
12,128,19,19,19,19
14,77,77,45,77,45
20,108,108,27,27,49
24,15,63,63,23,15
26,89,89,89,97,97
38,75,61,61,61,104
47,82,127,43,43,43
48,15,15,63,15,63
66,5,5,110,5,112
76,14,30,30,30,14


In [146]:
def get_decision_fn(weights):
    def fn(row):
        votes = np.zeros(128 + 1, dtype=np.int)
        for r, w in zip(row, weights):
            votes[r] += w
        return np.argmax(votes)
    return fn

In [147]:
merged_df.loc[:, 'MajVote'] = merged_df.apply(get_decision_fn(weights=[1.0, 1.0, 1.0, 1.0, 1.0]), axis=1)

In [148]:
merged_df[disagreement_mask].head(10)

Unnamed: 0_level_0,FurnitureNASNetALarge_20180418_0635,20180428_1455,FurnitureInceptionResNet299_20180426_1440,FurnitureInceptionV4_350_20180425_2258,FurnitureInceptionV4_350_20180428_0808,MajVote
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
12,128,19,19,19,19,19
14,77,77,45,77,45,77
20,108,108,27,27,49,27
24,15,63,63,23,15,15
26,89,89,89,97,97,89
38,75,61,61,61,104,61
47,82,127,43,43,43,43
48,15,15,63,15,63,15
66,5,5,110,5,112,5
76,14,30,30,30,14,30


In [149]:
merged_df[disagreement_mask].tail(10)

Unnamed: 0_level_0,FurnitureNASNetALarge_20180418_0635,20180428_1455,FurnitureInceptionResNet299_20180426_1440,FurnitureInceptionV4_350_20180425_2258,FurnitureInceptionV4_350_20180428_0808,MajVote
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
12729,77,81,77,77,77,77
12739,105,105,73,125,125,105
12746,72,72,72,72,54,72
12747,42,113,13,113,80,113
12756,17,17,22,21,86,17
12766,60,106,106,106,106,106
12776,79,115,37,121,121,121
12781,23,4,4,4,23,4
12795,2,2,88,2,2,2
12797,75,75,75,75,61,75


In [150]:
merged_df['MajVote'].to_csv("maj_votes_2nasnet_2incv4_incv4rc.csv", header=["predicted"])

In [151]:
!head maj_votes_2nasnet_2incv4_incv4rc.csv

id,predicted
1,12
2,71
3,91
4,54
5,126
6,76
7,94
8,8
9,127


## Correlations between probas on validation

In [2]:
from pathlib import Path
import sys
sys.path.insert(0, Path(".").absolute().parent.as_posix())

In [3]:
import matplotlib.pylab as plt
%matplotlib inline
import seaborn as sns


In [4]:
import numpy as np
import pandas as pd

from PIL import Image


OUTPUT_PATH = Path(".").absolute().parent / "output"

In [5]:
# !ls ../output/val_probas_inceptionv4_350_resized_crop/20180428_1633/

In [6]:
prediction_files = [
    OUTPUT_PATH / "val_probas_inceptionresnetv2_350_resized_crop" / "20180428_1622" / "probas.csv",
    OUTPUT_PATH / "val_probas_inceptionv4_350_resized_crop" / "20180428_1633" / "probas.csv",
    OUTPUT_PATH / "val_probas_nasnetalarge_350_resized_crop" / "20180428_1654" / "probas.csv",    
]

In [7]:
dfs = [pd.read_csv(f, index_col='id') for f in prediction_files]

# merged_df = pd.concat([df for df in dfs], axis=1)
# merged_df.columns = names

In [8]:
df_probas = pd.concat(dfs, axis=0)

In [9]:
y_probas = df_probas.groupby('id').mean()
y_probas.head()

Unnamed: 0_level_0,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,...,c118,c119,c120,c121,c122,c123,c124,c125,c126,c127
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.46379e-08,8.065056e-07,7.792222e-08,7.491578e-08,2.797733e-08,1.451325e-07,2.913413e-08,6.184602e-07,1.00651e-08,2.935796e-06,...,2.227966e-07,2.282861e-08,1.963591e-08,1.422318e-08,6.270509e-08,5.168149e-08,8.267714e-07,6.070628e-07,1.14039e-07,1.168854e-08
2,9.347801e-06,1.624991e-06,0.0003003126,0.001182464,3.153259e-06,2.066257e-05,4.393156e-06,8.210427e-05,1.200259e-06,2.793317e-06,...,9.80263e-07,0.0003333903,5.266618e-06,2.500858e-05,1.383422e-06,4.214561e-06,1.071219e-05,0.0002327887,2.591772e-06,1.976304e-05
3,4.416102e-08,4.241639e-07,1.351062e-07,4.784809e-08,3.953332e-07,4.317448e-08,2.568201e-06,1.424658e-07,1.684594e-07,2.076342e-07,...,2.14685e-07,1.545074e-05,6.385568e-05,6.052857e-06,2.460202e-07,0.0008062696,3.916902e-07,1.67832e-06,9.834748e-08,2.041568e-06
4,3.394759e-06,2.958466e-06,1.998878e-06,0.001193497,1.526749e-06,2.524314e-06,7.697208e-07,0.0005268663,6.972285e-07,3.750449e-07,...,6.772507e-07,5.826728e-06,2.263593e-06,1.069762e-06,7.759116e-07,3.597562e-06,2.399399e-06,0.956977,1.40773e-06,1.207639e-06
5,1.737058e-07,1.281176e-09,4.902731e-09,6.457448e-06,6.476485e-09,8.372518e-07,3.624372e-08,3.835375e-08,3.213217e-09,8.829736e-08,...,3.043351e-09,2.643031e-07,1.48227e-08,3.292611e-09,7.460706e-09,1.318289e-08,5.479758e-09,8.172043e-09,3.189449e-09,1.807494e-08


In [10]:
from common.dataset import FilesFromCsvDataset, TransformedDataset
from common.meta import get_metafeatures, get_imsize_and_targets


dataset = FilesFromCsvDataset("../output/filtered_val_dataset.csv")
dataset = TransformedDataset(dataset,
                             transforms=lambda x: (x, Image.open(x).size),
                             target_transforms=lambda l: l - 1)

df_imsize_targets = get_imsize_and_targets(dataset)

y_true = df_imsize_targets['target']
y_probas = y_probas.loc[y_true.index, :]

In [11]:
y_pred = np.argmax(y_probas.values, axis=1)

In [12]:
from sklearn.metrics import confusion_matrix, recall_score, precision_score

By definition a confusion matrix :math:`C` is such that :math:`C_{i, j}`
is equal to the number of observations known to be in group :math:`i` but
predicted to be in group :math:`j`.

In [14]:
cm = confusion_matrix(y_true, y_pred)
recall_per_class = recall_score(y_true, y_pred, average=None)

In [15]:
idx = np.arange(len(cm))
mcm = cm - cm[idx, idx] * np.eye(len(cm), dtype=np.int)

In [20]:
misclassifed = {}

for i in range(128):
    classes = np.where(mcm[i, :] >= 4)[0]
    values = mcm[i, classes]
    if len(classes) > 0:
        misclassifed[i] = {
            'recall': recall_per_class[i],
            'wrong_classes':[(c, v) for c, v in zip(classes, values)]
        }

In [21]:
misclassifed

{1: {'recall': 0.84, 'wrong_classes': [(87, 4)]},
 3: {'recall': 0.5625, 'wrong_classes': [(2, 7), (28, 5)]},
 14: {'recall': 0.3, 'wrong_classes': [(3, 4), (28, 5), (62, 8), (125, 6)]},
 18: {'recall': 0.66, 'wrong_classes': [(127, 7)]},
 21: {'recall': 0.7872340425531915, 'wrong_classes': [(16, 4)]},
 22: {'recall': 0.7551020408163265, 'wrong_classes': [(62, 6)]},
 26: {'recall': 0.6938775510204082, 'wrong_classes': [(111, 9)]},
 27: {'recall': 0.8979591836734694, 'wrong_classes': [(23, 4)]},
 30: {'recall': 0.7916666666666666, 'wrong_classes': [(69, 6)]},
 34: {'recall': 0.7916666666666666, 'wrong_classes': [(12, 4), (69, 4)]},
 38: {'recall': 0.68, 'wrong_classes': [(86, 11), (108, 5)]},
 48: {'recall': 0.7346938775510204, 'wrong_classes': [(124, 5)]},
 49: {'recall': 0.6530612244897959, 'wrong_classes': [(19, 4), (53, 12)]},
 50: {'recall': 0.75, 'wrong_classes': [(52, 4)]},
 53: {'recall': 0.7755102040816326, 'wrong_classes': [(19, 4)]},
 57: {'recall': 0.8775510204081632, 'wrong

In [25]:
misclassifed_t = {}

for i in range(128):
    classes = np.where(cm[:, i] >= 4)[0]
    values = cm[classes, i]
    if len(classes) > 1:
        misclassifed_t[i] = {
            'true_classes':[(c, v) for c, v in zip(classes, values)]
        }

In [26]:
misclassifed_t

{1: {'true_classes': [(1, 42), (87, 4)]},
 2: {'true_classes': [(2, 45), (3, 7), (57, 4)]},
 3: {'true_classes': [(3, 27), (14, 4)]},
 4: {'true_classes': [(4, 49), (107, 4)]},
 12: {'true_classes': [(12, 44), (34, 4)]},
 14: {'true_classes': [(14, 15), (62, 6)]},
 16: {'true_classes': [(16, 43), (21, 4)]},
 18: {'true_classes': [(18, 33), (127, 5)]},
 19: {'true_classes': [(19, 46), (49, 4), (53, 4), (99, 7)]},
 22: {'true_classes': [(22, 37), (62, 5)]},
 23: {'true_classes': [(23, 47), (27, 4)]},
 25: {'true_classes': [(25, 44), (62, 8)]},
 26: {'true_classes': [(26, 34), (111, 7)]},
 28: {'true_classes': [(3, 5), (14, 5), (28, 41), (62, 9)]},
 31: {'true_classes': [(31, 48), (65, 5)]},
 38: {'true_classes': [(38, 34), (86, 4), (108, 6)]},
 39: {'true_classes': [(39, 49), (65, 7)]},
 41: {'true_classes': [(41, 47), (58, 4)]},
 52: {'true_classes': [(50, 4), (52, 43)]},
 53: {'true_classes': [(49, 12), (53, 38), (87, 5)]},
 56: {'true_classes': [(56, 40), (65, 6)]},
 59: {'true_classe

In [170]:
df_probas.head()

Unnamed: 0_level_0,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,...,c118,c119,c120,c121,c122,c123,c124,c125,c126,c127
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6302,1.373452e-11,1.510409e-10,1.260431e-12,4.921817e-12,1.109817e-10,2.23659e-12,5.07353e-11,2.922072e-12,2.468959e-12,5.981444e-12,...,8.672143e-13,2.229347e-13,1.81405e-11,7.032357e-14,1.983395e-10,6.569913e-12,1.698414e-11,5.340068e-11,4.467111e-11,6.92629e-13
3349,6.708128e-07,5.170703e-07,1.846702e-07,4.019679e-07,2.481107e-07,3.374197e-07,2.376433e-06,9.245439e-07,0.004313464,3.201822e-07,...,2.315184e-07,2.930983e-07,1.619783e-05,1.609546e-06,0.0003666026,6.810326e-07,1.135143e-06,4.54206e-07,6.519358e-07,7.396901e-07
484,1.588944e-07,3.46054e-06,1.504418e-05,3.487897e-06,3.223231e-08,1.037788e-06,5.816736e-07,1.317494e-08,5.965195e-07,3.312266e-07,...,0.9997158,2.687145e-08,7.48398e-10,1.66301e-09,1.542122e-07,3.481257e-09,2.401089e-08,2.147751e-09,9.340559e-07,7.470094e-09
2677,5.284062e-11,1.166769e-08,2.62324e-11,6.023646e-10,3.024778e-07,3.700055e-10,2.080192e-10,8.870108e-11,1.285723e-10,1.011352e-10,...,1.989998e-11,6.330309e-10,2.516007e-08,1.031392e-10,9.538543e-10,4.859603e-09,1.651155e-07,9.416713e-10,1.15199e-08,9.996565e-11
1517,0.0001060053,2.394826e-07,4.06639e-06,1.615211e-05,1.085549e-07,6.640157e-05,1.039728e-06,5.748253e-08,1.454809e-06,2.930471e-05,...,1.193192e-05,0.1042851,2.138914e-07,1.049197e-07,1.285039e-07,2.57296e-08,4.993359e-07,7.153245e-08,4.385286e-08,1.651578e-06


In [195]:
class_index = 62
cols = ['c{}'.format(c) for c, _ in misclassifed[class_index]['wrong_classes']] + ['c{}'.format(class_index)]

In [27]:
df_probas.loc[y_true[y_true == class_index].index[:10], cols]

NameError: name 'class_index' is not defined

Same on test data

In [28]:
from pathlib import Path
import sys
sys.path.insert(0, Path(".").absolute().parent.as_posix())

In [30]:
import matplotlib.pylab as plt
%matplotlib inline
import seaborn as sns

In [31]:
import numpy as np
import pandas as pd

from PIL import Image


OUTPUT_PATH = Path(".").absolute().parent / "output"

In [152]:
prediction_files = [
    OUTPUT_PATH / "test_probas_inceptionresnetv2_350_resized_crop" / "20180429_1242" / "probas.csv",
    OUTPUT_PATH / "test_probas_inceptionv4_350_resized_crop" / "20180429_1303" / "probas.csv",
    OUTPUT_PATH / "test_probas_nasnetalarge_350_resized_crop" / "20180429_1406" / "probas.csv",    
]

In [153]:
dfs = [pd.read_csv(f, index_col='id') for f in prediction_files]

# merged_df = pd.concat([df for df in dfs], axis=1)
# merged_df.columns = names

In [154]:
df_probas = pd.concat(dfs, axis=0)

In [155]:
y_probas = df_probas.groupby('id').mean()
y_probas.head()

Unnamed: 0_level_0,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,...,c118,c119,c120,c121,c122,c123,c124,c125,c126,c127
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,9.862199e-07,8.224057e-07,1.880568e-06,1e-06,4.618293e-07,9.263216e-07,9.897302e-07,6.271725e-07,1.915197e-07,9.213227e-07,...,2.152936e-07,5.636821e-07,2.699563e-06,6.409772e-07,5.432047e-07,3.062046e-07,7.301491e-07,5.228147e-06,2.516525e-07,1.886951e-07
2,1.720262e-05,2.380006e-06,1.445897e-05,7.8e-05,1.80376e-05,1.726636e-05,2.566374e-05,0.0001237212,3.188547e-06,3.855472e-06,...,3.368749e-06,0.0005478171,6.021129e-05,0.0001385998,1.258187e-05,0.0001043778,0.0001264411,0.0003751204,6.12876e-05,0.0001277217
3,1.770536e-07,2.894921e-09,3.755934e-09,3e-06,6.970809e-09,6.017273e-08,1.694022e-09,1.671877e-08,1.665075e-09,1.644577e-09,...,3.482825e-08,8.414282e-07,1.057803e-08,8.492903e-09,1.114766e-09,8.784082e-09,2.222e-09,2.684315e-07,2.477973e-09,1.248474e-08
4,0.001782248,0.02733547,0.002086777,7.9e-05,8.824658e-05,0.0003419626,3.641507e-05,2.742752e-06,0.003056863,3.505401e-06,...,0.000151742,0.0001697029,1.144454e-05,5.063473e-05,2.645892e-05,1.315374e-05,0.0009224437,1.837482e-05,1.047604e-06,1.024565e-05
5,1.07137e-05,3.960776e-06,7.329427e-06,0.00197,2.539115e-06,3.110096e-06,3.368884e-06,0.0002632378,1.026764e-06,3.634773e-07,...,5.432784e-07,5.232236e-05,1.627422e-05,8.387149e-06,2.052396e-06,1.900287e-05,5.119948e-06,0.8342493,1.508827e-06,6.451978e-05


In [156]:
missing_indices = list(set(merged_df['MajVote'].index) - set(y_probas.index))
missing_values = merged_df.loc[missing_indices, 'MajVote'].values
for idx, v in zip(missing_indices, missing_values):
    y_probas.loc[idx, :] = 0.01
    col = y_probas.columns[v - 1]
    y_probas.loc[idx, col] = 0.99

In [157]:
y_probas = y_probas.sort_index()
y_probas.tail()

Unnamed: 0_level_0,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,...,c118,c119,c120,c121,c122,c123,c124,c125,c126,c127
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
12796,4.381392e-09,6.314962e-07,7.572957e-08,2.311437e-09,1.356048e-09,1.668792e-08,4.363206e-08,1.671824e-09,1.091921e-07,1.792087e-09,...,2.857898e-09,1.509412e-07,1.006055e-07,2.99411e-08,2.972021e-08,1.198559e-08,7.935118e-08,1.846029e-09,1.235438e-09,7.261427e-10
12797,6.910108e-06,4.275746e-06,3.172889e-05,2.968771e-05,7.82242e-06,7.213247e-06,6.875404e-06,2.104151e-05,4.416036e-06,4.366382e-06,...,3.824326e-06,2.759305e-05,3.145685e-05,1.86367e-05,6.214833e-06,0.005094435,1.052879e-05,2.164282e-05,1.473006e-05,3.066357e-06
12798,2.383735e-06,1.047669e-06,0.2143573,0.0004733417,8.855819e-07,4.352939e-06,1.309082e-06,1.26239e-06,1.347197e-06,6.095003e-07,...,1.139767e-06,1.199401e-05,2.032089e-06,1.803226e-06,7.73093e-07,4.508365e-07,1.895334e-06,4.73779e-06,7.498915e-07,5.017002e-07
12799,0.9096714,7.947413e-06,0.0009341166,0.002122171,3.859915e-06,0.00384479,2.881859e-05,9.615906e-06,2.541031e-05,4.21484e-05,...,0.0001192395,0.002515659,9.368937e-05,0.0001283233,7.900182e-06,5.917276e-06,5.282538e-06,2.622265e-05,7.296549e-06,9.080807e-06
12800,6.250671e-08,3.572248e-08,1.1696e-07,2.540043e-07,5.049646e-08,8.659654e-08,1.20249e-07,2.696892e-08,1.159987e-07,8.211985e-09,...,2.345162e-08,1.51564e-07,7.044685e-08,5.965807e-08,3.267539e-07,2.989624e-07,2.173108e-07,1.245238e-08,3.891613e-08,8.717371e-07


In [158]:
y_pred = np.argmax(y_probas.values, axis=1) + 1

In [159]:
y_maj_votes = merged_df['MajVote'].values

In [160]:
(y_maj_votes == y_pred).sum(), (y_maj_votes != y_pred).sum(), len(y_maj_votes)

(12421, 379, 12800)

In [161]:
y_pred[y_maj_votes != y_pred]

array([ 45, 127,  63, 110, 124,  45,  97,   1,  97,  24, 115,  49,  45,
        54, 126,  50,  57,  42,  51,  87, 128,  71,  98,  66,  35, 100,
        81,  63,  45, 112,  87,  45,  54,  22, 124, 126,  62, 116,  44,
        81,  22, 125,   1,  18,  18, 114,  96,  14, 127,   1,  81,  87,
        82, 112, 104,  85, 109, 103, 112,  64,  66,   4,   3,  84, 127,
        46, 118,  23,  83,  93, 126,  37, 125, 103,   3, 112, 110, 124,
       110,  84,  47,  69, 121,  51, 107, 113,  27,  88,  29,  54,  82,
        83,  31,  18,  91, 117, 115,  16,  86,  19,  97,   8,  71,  74,
       104,   5, 110,   4,  59,  70,  21,  26, 105, 117,  40,  61,  78,
        69,   7, 112,  98, 110,  15, 112, 115, 108, 103, 108,  75,  61,
        20,  11,  86, 124,  96,  68,  54,  87, 116, 109, 113,  23,  93,
        14,  12,  22,  29, 121,  13,   1,  86,  37, 104,  57,  89,  29,
       100, 102,  31,  75,  88,  53, 128,  25,  96,  21,  78,  65, 115,
        98, 110,   7,  31,  23, 126, 105,  35, 110,  73, 126, 12

In [136]:
df = pd.DataFrame({"predicted": y_pred}, index=y_probas.index)
df.to_csv("mean_proba_nasnet_incv4_incv4rc.csv")

In [137]:
!head -10 mean_proba_nasnet_incv4_incv4rc.csv

id,predicted
1,12
2,71
3,91
4,54
5,126
6,76
7,94
8,8
9,127
