# Various checks of model and metrics

In [1]:
import os 
import sys
import numpy as np

sys.path.append("../submissions/keras_cnns_pretrained")
sys.path.append("../")

In [2]:
from rampwf.workflows.image_classifier import BatchGeneratorBuilder

In [3]:
from image_preprocessor import transform, transform_test
from problem import get_cv, get_train_data, Predictions, score_types

## Load data and score functions with rampws

In [4]:
n_classes = 403
batch_size = 16

In [5]:
folder_X_train, y_train = get_train_data(path="..")
cv = list(get_cv(folder_X_train, y_train))

for fold_i, (train_is, valid_is) in enumerate(cv):

    folder, X_train = folder_X_train

    gen = BatchGeneratorBuilder(X_array=X_train[train_is], y_array=y_train[train_is], 
                                transform_img=transform, transform_test_img=transform_test, 
                                folder=folder, 
                                chunk_size=batch_size*5, 
                                n_classes=n_classes, n_jobs=1)
    break

In [6]:
ground_truth_train_train = Predictions(y_true=y_train[train_is])

In [7]:
score_types

[<rampwf.score_types.accuracy.Accuracy at 0x7f55eff505c0>,
 <rampwf.score_types.negative_log_likelihood.NegativeLogLikelihood at 0x7f55eff505f8>,
 <rampwf.score_types.f1_above.F1Above at 0x7f55eff50630>]

In [8]:
score_types[-1].score_function(ground_truth_train_train, ground_truth_train_train)

1.0

In [9]:
def score_function(self, ground_truths, predictions, valid_indexes=None):
    self.label_names = ground_truths.label_names
    if valid_indexes is None:
        valid_indexes = slice(None, None, None)
    y_pred_label_index = predictions.y_pred_label_index[valid_indexes]
    y_true_label_index = ground_truths.y_pred_label_index[valid_indexes]
    self.check_y_pred_dimensions(y_true_label_index, y_pred_label_index)
    return self.__call__(y_true_label_index, y_pred_label_index)


In [10]:
valid_indexes = slice(None, None, None)

ground_truth_train_train.y_pred_label_index[valid_indexes]

array([159,  57,  84, ...,  35,  15,  95])

## Compare f1above, f1_score and fbeta_score (beta=1)

### Example of incorrect rare classes prediction 

In [40]:
from sklearn.metrics import f1_score, fbeta_score
from rampwf.score_types.f1_above import F1Above

In [144]:
y_true_labels = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 4, 5]
y_pred_labels = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [145]:
f1_score(y_true_labels, y_pred_labels, average=None), fbeta_score(y_true_labels, y_pred_labels, beta=1, average=None) 

  'precision', 'predicted', average, warn_for)


(array([ 1.        ,  0.72727273,  0.        ,  0.        ,  0.        ,  0.        ]),
 array([ 1.        ,  0.72727273,  0.        ,  0.        ,  0.        ,  0.        ]))

In [146]:
f1above = F1Above(threshold=0.7, precision=3)

In [147]:
f1above(y_true_labels, y_pred_labels)

  'precision', 'predicted', average, warn_for)


0.3333333333333333

### Another example 

In [114]:
nb_classes = 10

In [115]:
y_true = np.zeros((nb_classes, nb_classes), dtype=np.uint8)
for i in range(nb_classes):
    y_true[i, i] = 1
    
y_pred = np.zeros((nb_classes, nb_classes), dtype=np.uint8)
for i in range(nb_classes):
    j = np.random.randint(i-1 if i-1 >= 0 else 0, i+1 if i+1 < nb_classes else i)
    y_pred[i, j] = 1

In [116]:
print(y_true, '\n', y_pred)

[[1 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 0 0 1]] 
 [[1 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 1 0]]


In [117]:
y_true_labels = np.array([np.where(y > 0)[0] for y in y_true]).ravel()
y_pred_labels = np.array([np.where(y > 0)[0] for y in y_pred]).ravel()

In [118]:
y_pred_labels, y_true_labels

(array([0, 0, 1, 2, 4, 5, 6, 7, 7, 8]), array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))

In [119]:
f1above(y_true, y_pred), f1above(y_true_labels, y_pred_labels)

  'precision', 'predicted', average, warn_for)


(0.3, 0.3)

### How it is computed

In [120]:
f1above??

In [121]:
res = f1_score(y_true_labels, y_pred_labels, average=None)
res

  'precision', 'predicted', average, warn_for)


array([ 0.66666667,  0.        ,  0.        ,  0.        ,  1.        ,
        1.        ,  1.        ,  0.66666667,  0.        ,  0.        ])

In [122]:
len(res[res>0.7]) / len(res) 

0.3

In [123]:
res = fbeta_score(y_true_labels, y_pred_labels, beta=1, average=None)
res

  'precision', 'predicted', average, warn_for)


array([ 0.66666667,  0.        ,  0.        ,  0.        ,  1.        ,
        1.        ,  1.        ,  0.66666667,  0.        ,  0.        ])

In [124]:
len(res[res>0.7]) /  len(res) 

0.3

In [127]:
res = f1_score(y_true, y_pred, average=None)
res

  'precision', 'predicted', average, warn_for)


array([ 0.66666667,  0.        ,  0.        ,  0.        ,  1.        ,
        1.        ,  1.        ,  0.66666667,  0.        ,  0.        ])

In [128]:
len(res[res>0.7]) / len(res) 

0.3

In [131]:
from sklearn.metrics import precision_recall_fscore_support
precision_recall_fscore_support??

Compute f1 with TP, FP, FN

In [161]:
TP = np.sum(np.round(np.clip(y_true * y_pred, 0, 1)), axis=0)
TP_FP = np.sum(np.round(np.clip(y_pred, 0, 1)), axis=0)
TP_FN = np.sum(np.round(np.clip(y_true, 0, 1)), axis=0)

In [162]:
TP, TP_FP, TP_FN

(array([1, 0, 0, 0, 1, 1, 1, 1, 0, 0], dtype=uint64),
 array([2, 1, 1, 0, 1, 1, 1, 2, 1, 0], dtype=uint64),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=uint64))

In [158]:
y_pred

array([[1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 1, 0]], dtype=uint8)

In [163]:
2.0*TP / (TP_FP + TP_FN)

array([ 0.66666667,  0.        ,  0.        ,  0.        ,  1.        ,
        1.        ,  1.        ,  0.66666667,  0.        ,  0.        ])

In [208]:
np.clip((2.0*TP / (TP_FP + TP_FN) - 0.7)*10.0, 0.0, 1.0)

array([ 0.,  0.,  0.,  0.,  1.,  1.,  1.,  0.,  0.,  0.])

## Implement f170 metric in Keras

In [91]:
from keras import backend as K

In [209]:
def f170(y_true, y_pred):

    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)), axis=0)
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)), axis=0)
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)), axis=0)

    numer = 2.0 * true_positives
    denom = predicted_positives + possible_positives + K.epsilon()
    f1 = numer / denom
    score = K.sum(K.round(K.clip((f1 - 0.7)*10.0, 0.0, 1)), axis=0) / K.int_shape(f1)[0]
    return f1, score

In [210]:
Y_true = K.placeholder(shape=(None, nb_classes), dtype='float32')
Y_pred = K.placeholder(shape=(None, nb_classes), dtype='float32')
f170_score = f170(Y_true, Y_pred)

In [211]:
print(y_true, '\n', y_pred)

[[1 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 0 0 1]] 
 [[1 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 1 0]]


In [212]:
sess = K.get_session()
res = sess.run(f170_score, feed_dict={Y_true: y_true, Y_pred: y_pred})

In [213]:
res

(array([ 0.66666669,  0.        ,  0.        ,  0.        ,  1.        ,
         1.        ,  1.        ,  0.66666669,  0.        ,  0.        ], dtype=float32),
 0.30000001)