# Optimizing how the prediction results are served

In this notebook I change how predictions are served by providing the results that add up to 95% certainty, 
instead of simply the top 5.

If there are more or less than 5 in the certainty range I implement body-type consensus

In [1]:
import os
import pickle

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from custom.tools import pickle_variable_to_path

### Body-type clustering

In [32]:
WORKING_DIR = os.getcwd()
OPEN_DIR = os.path.join(WORKING_DIR, 'data', 'notebooks', '5_model_validation')
DATA_DIR = os.path.join(WORKING_DIR, 'data', 'InceptionV3')
LOOKUP_PATH = os.path.join(DATA_DIR, 'I_15_lookup_dict.pkl')

with open(os.path.join(OPEN_DIR, 'label_category_dict_corrected.pkl'), 'rb') as f:
    label_category_dict = pickle.load(f)
    
with open(os.path.join(OPEN_DIR, 'test_y_true.pkl'), 'rb') as f:
    test_y_true = pickle.load(f)

with open(os.path.join(OPEN_DIR, 'test_y_pred.pkl'), 'rb') as f:
    test_y_pred = pickle.load(f)
    
with open(os.path.join(LOOKUP_PATH), 'rb') as f:
    lookup_dict = pickle.load(f)

In [63]:
def find_top_percent_prediction_indices(prediction_array_row, **kwargs):
    """
    prediction array row should be shape (classes,)
    """
    # sort them from greatest to lowest
    top_idxs = np.argsort(prediction_array_row)[::-1]
#     print(top_idxs)
    
    # only select ones that add up to 99% certainty
    selected = []
    certainty = 0
    i = 0
    while certainty <= kwargs['percent']/100:
#         print(top_idxs[i])
        selected.append(top_idxs[i])
        certainty += prediction_array_row[top_idxs[i]]
        i += 1
    return selected

# show that it worked
test_i = 1
print(test_y_pred[test_i, find_top_percent_prediction_indices(test_y_pred[test_i,:], percent=95)])

[ 0.42262146  0.31554991  0.16753168  0.06199163]


In [62]:
def find_top_k_prediction_indices(prediction_array_row, **kwargs):
    """
    prediction array row should be shape (classes,)
    """
    # find the indices of the top 5 predictions for each row
    top_idxs = np.argpartition(prediction_array_row, -(kwargs['k']+1))[-(kwargs['k']):]
    # sort them from greatest to lowest
    top_idxs = top_idxs[np.argsort(prediction_array_row[top_idxs])][::-1]
    return top_idxs

# show that it worked
print(test_y_pred[0, find_top_k_prediction_indices(test_y_pred[0,:], k=3)])

[  9.99222636e-01   7.47231010e-04   2.20297807e-05]


In [None]:
def make_pred_dict_list(y_true, y_pred, lookup_dict, label_category_dict, index_finder, **kwargs):
    prediction_dict_list = []
    for row in range(y_true.shape[0]):
        prediction_dict = {'pred_labels': [], 'pred_bodies': []}
        true_idx = np.argmax(y_true[row,:])
        true_label = lookup_dict[true_idx]
        prediction_dict['true_label'] = true_label
        prediction_dict['true_body'] = label_category_dict[true_label]['body_style']
        top_idxs = index_finder(y_pred[row,:], **kwargs)
        for i in top_idxs:
            pred_label = lookup_dict[i]
            pred_body = label_category_dict[pred_label]['body_style']
            prediction_dict['pred_labels'].append(pred_label)
            prediction_dict['pred_bodies'].append(pred_body)
        prediction_dict_list.append(prediction_dict)
    return prediction_dict_list

In [125]:
### Change a pred_dict_list to remove non-consensus body types
def keep_consensus_body(pred_dict_list):
    import collections
    import copy
    pred_dict_list_copy = copy.deepcopy(pred_dict_list)
    for pred_dict in pred_dict_list_copy:
        body_count = collections.Counter(pred_dict['pred_bodies'])
        most_common_body = body_count.most_common(2)
        most_common_body = [tup[0] for tup in most_common_body]
        pred_dict['consensus_body'] = most_common_body
        pred_dict['consensus_labels'] = []
        for i, label in enumerate(pred_dict['pred_labels']):
            if pred_dict['pred_bodies'][i] in most_common_body:
                pred_dict['consensus_labels'].append(label)
    return pred_dict_list_copy

In [166]:
top_5_pred_dict_list = make_pred_dict_list(
    test_y_true, test_y_pred,
    lookup_dict, label_category_dict,
    find_top_k_prediction_indices, k=5
)

consensus_top_5 = keep_consensus_body(top_5_pred_dict_list)

In [180]:
def characterize_pred_list(prediction_dict_list,
                           prediction_label, 
                           consensus=False):
    correct_id_body = 0
    correct_id = 0
    incorrect = 0
    wrong_body_groupings = []
    for prediction_dict in prediction_dict_list:
        pred_body_set = sorted(list(set(prediction_dict['pred_bodies'])))
        # correct
        if prediction_dict['true_label'] in prediction_dict[prediction_label]:
            if len(pred_body_set) == 1:
                correct_id_body += 1
            else:
                correct_id += 1
                if consensus:
                    wrong_body_groupings.append((prediction_dict['consensus_body'], prediction_dict))
                else:
                    wrong_body_groupings.append((pred_body_set, prediction_dict))
        # incorrect
        else:
            incorrect += 1
    
    wrong_body_set = []
    tracker = []
    for group in wrong_body_groupings:
        test_group = sorted(group[0])
        if test_group not in tracker:
            tracker.append(test_group)
            wrong_body_set.append(group)
    
    total = (correct_id_body + correct_id + incorrect)
    assert total==len(prediction_dict_list), 'total is wrong'
    print('percent correct: {}'.format(round(correct_id_body/total, 3)))
    print('percent correct with wrong body in results: {}'.format(round(correct_id/total, 3)))
    print('percent incorrect: {}'.format(round(incorrect/total, 3)))
    print('number of wrong body groupings: {}' .format(len(wrong_body_set)))
    
    return wrong_body_set, tracker

In [181]:
percent_pred_dict_list = make_pred_dict_list(
    test_y_true, test_y_pred,
    lookup_dict, label_category_dict,
    find_top_percent_prediction_indices, percent=95
)

consensus_percent = keep_consensus_body(percent_pred_dict_list)

In [176]:
percent_wrong_body, percent_tracker =\
characterize_pred_list(percent_pred_dict_list, 'pred_labels')

percent correct: 0.69
percent correct with wrong body in results: 0.23
percent incorrect: 0.08
number of wrong body groupings: 102


In [182]:
percent_wrong_body, percent_tracker =\
characterize_pred_list(consensus_percent, 'consensus_labels', consensus=True)

percent correct: 0.692
percent correct with wrong body in results: 0.215
percent incorrect: 0.092
number of wrong body groupings: 30


In [178]:
percent_wrong_body

[(['COUPE', 'CONVERTIBLE'],
  {'consensus_body': ['COUPE', 'CONVERTIBLE'],
   'consensus_labels': ['FIAT-500_Convertible-2012',
    'Acura-Integra_Type_R-2001',
    'Chevrolet-Cobalt_SS-2010',
    'BMW-M3_Coupe-2012'],
   'pred_bodies': ['CONVERTIBLE', 'COUPE', 'COUPE', 'COUPE'],
   'pred_labels': ['FIAT-500_Convertible-2012',
    'Acura-Integra_Type_R-2001',
    'Chevrolet-Cobalt_SS-2010',
    'BMW-M3_Coupe-2012'],
   'true_body': 'COUPE',
   'true_label': 'Acura-Integra_Type_R-2001'}),
 (['SEDAN', 'COUPE'],
  {'consensus_body': ['SEDAN', 'COUPE'],
   'consensus_labels': ['Audi-100_Sedan-1994',
    'Audi-V8_Sedan-1994',
    'Mercedes_Benz-C_Class_Sedan-2012',
    'Audi-TTS_Coupe-2012',
    'Audi-S5_Coupe-2012'],
   'pred_bodies': ['SEDAN',
    'SEDAN',
    'SEDAN',
    'PICKUP',
    'COUPE',
    'COUPE',
    'CONVERTIBLE'],
   'pred_labels': ['Audi-100_Sedan-1994',
    'Audi-V8_Sedan-1994',
    'Mercedes_Benz-C_Class_Sedan-2012',
    'Ford-Ranger_SuperCab-2011',
    'Audi-TTS_Coupe-20

In [183]:
top_5_wrong_body, top_5_tracker =\
characterize_pred_list(top_5_pred_dict_list, 'pred_labels')

percent correct: 0.158
percent correct with wrong body in results: 0.79
percent incorrect: 0.052
number of wrong body groupings: 152


In [184]:
top_5_wrong_body, top_5_tracker =\
characterize_pred_list(consensus_top_5, 'consensus_labels')

percent correct: 0.158
percent correct with wrong body in results: 0.766
percent incorrect: 0.076
number of wrong body groupings: 152
