## Applied Machine Learning: Module 3 (Evaluation)

In [1]:
%matplotlib notebook
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits

dataset = load_digits()
X, y = dataset.data, dataset.target

for class_name, class_count in zip(dataset.target_names, np.bincount(dataset.target)):
    print(class_name, class_count)

0 178
1 182
2 177
3 183
4 181
5 182
6 181
7 179
8 174
9 180


In [2]:
y_binary_imbalanced = y.copy()
y_binary_imbalanced[y_binary_imbalanced != 1] = 0

print('Original labels:\t', y[1:30])
print('New binary labels:\t', y_binary_imbalanced[1:30])

Original labels:	 [1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9]
New binary labels:	 [1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]


In [3]:
np.bincount(y_binary_imbalanced)    # Negative class (0) is the most frequent class

array([1615,  182])

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)

from sklearn.svm import SVC
svm = SVC(kernel='rbf', C=1).fit(X_train, y_train)
svm.score(X_test, y_test)

0.90888888888888886

## Dummy Classifier

In [5]:
from sklearn.dummy import DummyClassifier

dummy_majority = DummyClassifier(strategy='most_frequent').fit(X_train, y_train)
y_dummy_prediction = dummy_majority.predict(X_test)
y_dummy_prediction

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0,

In [6]:
y_dummy_score = dummy_majority.score(X_test, y_test)
y_dummy_score

0.9044444444444445

In [7]:
svm = SVC(kernel='linear', C=1).fit(X_train, y_train)
svm.score(X_test, y_test)

0.97777777777777775

## Confusion Matrix

In [8]:
from sklearn.metrics import confusion_matrix

dummy_majority = DummyClassifier(strategy='most_frequent').fit(X_train, y_train)
y_preidcted_majority = dummy_majority.predict(X_test)
confusion_matrix(y_test, y_preidcted_majority)

array([[407,   0],
       [ 43,   0]])

In [9]:
# produces random predictions w/ same class proportion as training set
dummy_classprop = DummyClassifier(strategy='stratified').fit(X_train, y_train)
y_classprop_predicted = dummy_classprop.predict(X_test)
confusion = confusion_matrix(y_test, y_classprop_predicted)

print('Random class-proportional prediction (dummy classifier)\n', confusion)

Random class-proportional prediction (dummy classifier)
 [[362  45]
 [ 38   5]]


In [10]:
svm = SVC(kernel='linear', C=1).fit(X_train, y_train)
svm_predicted = svm.predict(X_test)
confusion = confusion_matrix(y_test, svm_predicted)

print('Support vector machine classifier (linear kernel, C=1)\n', confusion)

Support vector machine classifier (linear kernel, C=1)
 [[402   5]
 [  5  38]]


In [11]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression().fit(X_train, y_train)
lr_predicted = lr.predict(X_test)
confusion = confusion_matrix(y_test, lr_predicted)

print('Logistic regression classifier (default settings)\n', confusion)

Logistic regression classifier (default settings)
 [[401   6]
 [  6  37]]


In [12]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(max_depth=2).fit(X_train, y_train)
tree_predicted = dt.predict(X_test)
confusion = confusion_matrix(y_test, tree_predicted)

print('Decision tree classifier (max_depth = 2)\n', confusion)

Decision tree classifier (max_depth = 2)
 [[400   7]
 [ 17  26]]


## Evaluation metrics

In [13]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

print("Accuracy Score is : ", accuracy_score(y_test, tree_predicted))
print("Precission Score is : ", precision_score(y_test, tree_predicted))
print("Recall Score is : ", recall_score(y_test, tree_predicted))
print("F1 Score is : ", f1_score(y_test, tree_predicted))


Accuracy Score is :  0.946666666667
Precission Score is :  0.787878787879
Recall Score is :  0.604651162791
F1 Score is :  0.684210526316


In [14]:
from sklearn.metrics import classification_report

print(classification_report(y_test, tree_predicted))

             precision    recall  f1-score   support

          0       0.96      0.98      0.97       407
          1       0.79      0.60      0.68        43

avg / total       0.94      0.95      0.94       450



In [15]:
print(classification_report(y_test, y_preidcted_majority))

             precision    recall  f1-score   support

          0       0.90      1.00      0.95       407
          1       0.00      0.00      0.00        43

avg / total       0.82      0.90      0.86       450



  'precision', 'predicted', average, warn_for)


In [16]:
print(classification_report(y_test, y_classprop_predicted))

             precision    recall  f1-score   support

          0       0.91      0.89      0.90       407
          1       0.10      0.12      0.11        43

avg / total       0.83      0.82      0.82       450



In [17]:
print(classification_report(y_test, svm_predicted))

             precision    recall  f1-score   support

          0       0.99      0.99      0.99       407
          1       0.88      0.88      0.88        43

avg / total       0.98      0.98      0.98       450



In [18]:
print(classification_report(y_test, lr_predicted))

             precision    recall  f1-score   support

          0       0.99      0.99      0.99       407
          1       0.86      0.86      0.86        43

avg / total       0.97      0.97      0.97       450



## Classification functions 

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)
y_scores_lr = lr.fit(X_train, y_train).decision_function(X_test)
y_score_list = list(zip(y_test[0:20], y_scores_lr[0:20]))

# show the decision_function scores for first 20 instances
y_score_list

[(0, -23.177243630386034),
 (0, -13.541549223553204),
 (0, -21.722926237979213),
 (0, -18.90720273290988),
 (0, -19.735584374998727),
 (0, -9.7499338862329576),
 (1, 5.2349410704476753),
 (0, -19.307743891900014),
 (0, -25.101161656450504),
 (0, -21.827262937646445),
 (0, -24.151364573570763),
 (0, -19.576993612457002),
 (0, -22.574367253406553),
 (0, -10.823029971175339),
 (0, -11.911834087854848),
 (0, -10.978939587983268),
 (1, 11.206158779702218),
 (0, -27.646017082032056),
 (0, -12.859101512364315),
 (0, -25.848771385365538)]

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)
y_proba_lr = lr.fit(X_train, y_train).predict_proba(X_test)
y_proba_list = list(zip(y_test[0:20], y_proba_lr[0:20,1]))

# show the probability of positive class for first 20 instances
y_proba_list

[(0, 8.5966821100676432e-11),
 (0, 1.3150845182640114e-06),
 (0, 3.6809293584440439e-10),
 (0, 6.1480589646878529e-09),
 (0, 2.6850110883930215e-09),
 (0, 5.8303294565776929e-05),
 (1, 0.99470289874398121),
 (0, 4.1190593697416615e-09),
 (0, 1.2552674825668184e-11),
 (0, 3.3145658773940304e-10),
 (0, 3.2462968543970861e-11),
 (0, 3.1461459235748177e-09),
 (0, 1.570692010843091e-10),
 (0, 1.9935960414643117e-05),
 (0, 6.712323899653738e-06),
 (0, 1.7059538216009643e-05),
 (1, 0.99998640733985455),
 (0, 9.8514854570637491e-13),
 (0, 2.6009718607086267e-06),
 (0, 5.9432702865041518e-12)]

## Precision Recall Curve

In [23]:
from sklearn.metrics import precision_recall_curve

precision, recall, thresolds = precision_recall_curve(y_test, y_scores_lr)
closest_zero = np.argmin(np.abs(thresolds))
closest_zero_p = precision[closest_zero]
closest_zero_r = recall[closest_zero]

plt.figure()
plt.xlim([0.0, 1.01])
plt.ylim([0.0, 1.01])

plt.plot(precision, recall, label='Precision-Recall Curve')
plt.plot(closest_zero_p, closest_zero_r, 'o', markersize = 12, fillstyle = 'none', c='r', mew=3)
plt.xlabel('Precision', fontsize=16)
plt.ylabel('Recall', fontsize=16)
plt.axes().set_aspect('equal')
plt.show()

<IPython.core.display.Javascript object>



## ROC Curve, Area Under Curve

In [26]:
from sklearn.metrics import roc_curve, auc

X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)
y_scores_lr = lr.fit(X_train, y_train).decision_function(X_test)

fpr_lr, tpr_lr, _ = roc_curve(y_test, y_scores_lr)
roc_auc_lr = auc(fpr_lr, tpr_lr)

plt.figure()
plt.xlim([-0.01, 1.00])
plt.ylim([-0.01, 1.00])

plt.plot(fpr_lr, tpr_lr, lw=3, label='LogRegr ROC curve (area = {:0.2f})'.format(roc_auc_lr))
plt.xlabel('False Positive Rate', fontsize=16)
plt.ylabel('True Positive Rate', fontsize=16)
plt.title('ROC curve (1-of-10 digits classifier)', fontsize=16)
plt.legend(loc='lower right', fontsize=13)
plt.plot([0, 1], [0, 1], color='navy', lw=3, linestyle='--')
plt.axes().set_aspect('equal')
plt.show()

<IPython.core.display.Javascript object>



In [29]:
from matplotlib import cm

X_train, X_test, y_train, y_test = train_test_split(X, y_binary_imbalanced, random_state=0)

plt.figure()
plt.xlim([-0.01, 1.00])
plt.ylim([-0.01, 1.00])
for g in [0.01, 0.1, 0.20, 1]:
    svm = SVC(gamma=g).fit(X_train, y_train)
    y_scores_svm = svm.decision_function(X_test)
    fpr_svm, tpr_svm, _ = roc_curve(y_test, y_scores_svm)
    roc_auc_svm = auc(fpr_svm, tpr_svm)
    accuracy_svm = svm.score(X_test, y_test)
    print("gamma = {:.2f}  accuracy = {:.2f}   AUC = {:.2f}".format(g, accuracy_svm, 
                                                                    roc_auc_svm))
    plt.plot(fpr_svm, tpr_svm, lw=3, alpha=0.7, 
             label='SVM (gamma = {:0.2f}, area = {:0.2f})'.format(g, roc_auc_svm))
    
plt.xlabel('False Positive Rate', fontsize=16)
plt.ylabel('True Positive Rate (Recall)', fontsize=16)
plt.plot([0, 1], [0, 1], color='k', lw=0.5, linestyle='--')
plt.legend(loc="lower right", fontsize=11)
plt.title('ROC curve: (1-of-10 digits classifier)', fontsize=16)
plt.axes().set_aspect('equal')

plt.show()

<IPython.core.display.Javascript object>

gamma = 0.01  accuracy = 0.91   AUC = 1.00
gamma = 0.10  accuracy = 0.90   AUC = 0.98
gamma = 0.20  accuracy = 0.90   AUC = 0.66
gamma = 1.00  accuracy = 0.90   AUC = 0.50




## Multi class confusion matrix

In [39]:
from sklearn.metrics import confusion_matrix

dataset = load_digits()
X, y = dataset.data, dataset.target

X_train_mc , X_test_mc, y_train_mc, y_test_mc = train_test_split(X, y, random_state=0)

svm = SVC(kernel='linear').fit(X_train_mc, y_train_mc)
svm_predicted_mc = svm.predict(X_test_mc)

confusion_mc = confusion_matrix(y_test_mc, svm_predicted_mc)

df_cm = pd.DataFrame(confusion_mc, index=[i for i in range(0, 10)], columns=[i for i in range(0, 10)])
plt.figure(figsize=(5.5, 4))
sns.heatmap(df_cm, annot=True)
plt.title('SVM Linear Kernel \nAccuracy:{0:.3f}'.format(accuracy_score(y_test_mc, 
                                                                 svm_predicted_mc)))
plt.ylabel('True label')
plt.xlabel('Predicted label')

svm = SVC(kernel='rbf').fit(X_train_mc, y_train_mc)
svm_predicted_mc = svm.predict(X_test_mc)
confusion_mc = confusion_matrix(y_test_mc, svm_predicted_mc)
df_cm = pd.DataFrame(confusion_mc, index=[i for i in range(0, 10)], columns=[i for i in range(0, 10)])

plt.figure(figsize=(5.5, 4))
sns.heatmap(df_cm, annot=True)
plt.title('SVM RBF Kernel \n Accuracy: {0:.3f}'.format(accuracy_score(y_test_mc, svm_predicted_mc)))
plt.ylabel('True label')
plt.xlabel('Predicted label')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Text(0.5,14.7222,'Predicted label')

## Multi Class Classification Report

In [40]:
print(classification_report(y_test_mc, svm_predicted_mc))

             precision    recall  f1-score   support

          0       1.00      0.65      0.79        37
          1       1.00      0.23      0.38        43
          2       1.00      0.39      0.56        44
          3       1.00      0.93      0.97        45
          4       0.14      1.00      0.25        38
          5       1.00      0.33      0.50        48
          6       1.00      0.54      0.70        52
          7       1.00      0.35      0.52        48
          8       1.00      0.02      0.04        48
          9       1.00      0.55      0.71        47

avg / total       0.93      0.49      0.54       450



## Micro Average vs Macro Average precision 

In [41]:
print("Micro Average Precision = {:.2f}".format(precision_score(y_test_mc, svm_predicted_mc, average='micro')))
print("Macro Average Precision = {:.2f}".format(precision_score(y_test_mc, svm_predicted_mc, average='macro')))

Micro Average Precision = 0.49
Macro Average Precision = 0.91


In [42]:
print("Micro Average F1 = {:.2f}".format(f1_score(y_test_mc, svm_predicted_mc, average='micro')))
print("Macro Average F1 = {:.2f}".format(f1_score(y_test_mc, svm_predicted_mc, average='macro')))

Micro Average F1 = 0.49
Macro Average F1 = 0.54


## Regression Evaluation metrics

In [46]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.dummy import DummyRegressor

diabetes = datasets.load_diabetes()
X = diabetes.data[:, None, 6]
y = diabetes.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

lm = LinearRegression().fit(X_train, y_train)
lm_dummy_mean = DummyRegressor(strategy = 'mean').fit(X_train, y_train)

y_predict = lm.predict(X_test)
y_dummy_mean_predict = lm_dummy_mean.predict(X_test)

print("Linear model coefficients: ".format(lm.coef_))
print("Mean Squared Error ( dummy ) = {:.2f}".format(mean_squared_error(y_test, y_dummy_mean_predict)))
print("Mean Squared Error ( linear model ) = {:.2f}".format(mean_squared_error(y_test, y_predict)))
print("r2_score (dummy) = {:.2f}".format(r2_score(y_test, y_predict)))
print("r2_score (linear model) = {:.2f}".format(r2_score(y_test, y_dummy_mean_predict)))

plt.scatter(X_test, y_test, color='black')
plt.plot(X_test, y_predict, color='green', linewidth=2)
plt.plot(X_test, y_dummy_mean_predict, color='red', linestyle='dashed', linewidth=2, label='dummy')
plt.show()

Linear model coefficients: 
Mean Squared Error ( dummy ) = 4965.13
Mean Squared Error ( linear model ) = 4646.74
r2_score (dummy) = 0.06
r2_score (linear model) = -0.00


<IPython.core.display.Javascript object>